Python convert_to_unicode 예제들, pytorch_pretrained_bert.tokenization.convert_to_unicode Python 예제들

예제 #1

0

파일 보기

파일: run_classifier.py 프로젝트: rlebras/pytorch-pretrained-BERT

    def _create_examples(self, records, set_type):
        """Creates examples for the training and dev sets."""
        examples = []
        for (i, record) in enumerate(records):
            guid = "%s-%s-%s" % (set_type, record['InputStoryid'],
                                 record['ending'])

            beginning = record['InputSentence1']
            ending = record['InputSentence5']

            option1 = record['RandomMiddleSentenceQuiz1']
            option2 = record['RandomMiddleSentenceQuiz2']
            option3 = record['RandomMiddleSentenceQuiz3']

            answer = int(record['AnswerRightEnding']) - 1

            option1_context = convert_to_unicode(' '.join([beginning,
                                                           option1]))
            option2_context = convert_to_unicode(' '.join([beginning,
                                                           option2]))
            option3_context = convert_to_unicode(' '.join([beginning,
                                                           option3]))

            label = convert_to_unicode(str(answer))

            text_a = [option1_context, option2_context, option3_context]
            text_b = [ending, ending, ending]

            examples.append(
                InputExampleWithList(guid=guid,
                                     text_a=text_a,
                                     text_b=text_b,
                                     label=label))
        return examples

예제 #2

0

파일 보기

파일: run_classifier.py 프로젝트: rlebras/pytorch-pretrained-BERT

    def _create_examples(self, lines, set_type):
        """Creates examples for the training and dev sets."""
        examples = []
        for (i, line) in enumerate(lines):
            if i == 0:
                continue
            guid = "%s-%s" % (set_type, i)
            sentence = convert_to_unicode(line[1]).replace("\"", "")
            conj = convert_to_unicode(line[2])

            idx = sentence.index(conj)

            context = sentence[:idx + len(conj)]
            option_str = sentence[idx + len(conj):].strip()

            name1 = convert_to_unicode(line[3])
            name2 = convert_to_unicode(line[4])

            option1 = option_str.replace("_", name1)
            option2 = option_str.replace("_", name2)

            text_a = [context, context]
            text_b = [option1, option2]

            label = convert_to_unicode(line[5])

            examples.append(
                InputExampleWithList(guid=guid,
                                     text_a=text_a,
                                     text_b=text_b,
                                     label=label))
        return examples

예제 #3

0

파일 보기

파일: run_classifier_mod.py 프로젝트: ceroper/TwitterHateSpeechDetection

 def _create_examples(self, lines, set_type):
     """Creates examples for the training and dev sets."""
     examples = []
     for (i, line) in enumerate(lines):
         guid = "%s-%s" % (set_type, i)
         text_a = convert_to_unicode(line[3])
         label = convert_to_unicode(line[1])
         examples.append(
             InputExample(guid=guid, text_a=text_a, text_b=None, label=label))
     return examples

예제 #4

0

파일 보기

 def _create_examples(self, lines, set_type):
     """Creates examples for the training and dev sets."""
     examples = []
     for i, line in lines.iterrows():
         guid = "%s-%s" % (set_type, i)
         text_a = convert_to_unicode(line['text'])
         label = convert_to_unicode(line['label'])
         examples.append(
             InputExample(guid=guid,
                          text_a=text_a,
                          text_b=None,
                          label=label))
     return examples

예제 #5

0

파일 보기

파일: run_classifier_mod.py 프로젝트: ceroper/TwitterHateSpeechDetection

 def _create_examples(self, lines, set_type):
     """Creates examples for the training and dev sets."""
     examples = []
     for (i, line) in enumerate(lines):
         if i == 0:
             continue
         guid = "%s-%s" % (set_type, convert_to_unicode(line[0]))
         text_a = convert_to_unicode(line[8])
         text_b = convert_to_unicode(line[9])
         label = convert_to_unicode(line[-1])
         examples.append(
             InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
     return examples

예제 #6

0

파일 보기

파일: run_classifier.py 프로젝트: rlebras/pytorch-pretrained-BERT

    def _create_examples(self, records, set_type):
        """Creates examples for the training and dev sets."""
        examples = []
        num_fields = len([
            x for x in list(records[0].keys())
            if x.startswith('RandomMiddleSentenceQuiz')
        ])
        self._labels = [str(idx) for idx in range(1, num_fields + 1)]
        for (i, record) in enumerate(records):
            guid = "%s-%s-%s" % (set_type, record['InputStoryid'],
                                 record['ending'])

            beginning = record['InputSentence1']
            ending = record['InputSentence5']

            text_a = []
            text_b = []
            for idx in range(1, num_fields + 1):
                text_a.append(beginning + " " +
                              record["RandomMiddleSentenceQuiz" + str(idx)])
                text_b.append(ending + " Because , " +
                              record['CSK' + str(idx)])

            answer = int(record['AnswerRightEnding']) - 1
            label = convert_to_unicode(str(answer))

            examples.append(
                InputExampleWithListFourFields(guid=guid,
                                               text_a=text_a,
                                               text_b=text_b,
                                               text_c=None,
                                               text_d=None,
                                               label=label))
        return examples

예제 #7

0

파일 보기

파일: main_bert_hier.py 프로젝트: yana-xuyan/dialogue-emotion

def read_examples(data, no_label=False):
    """Read a list of `InputExample`s from an input file."""
    examples = []

    if no_label:
        for id, sent in zip(*data):
            examples.append(
                InputExample(unique_id=convert_to_unicode(str(id)),
                             text_a=convert_to_unicode(sent[0]),
                             text_b=convert_to_unicode(sent[1]),
                             text_c=convert_to_unicode(sent[2]),
                             label=convert_to_unicode('others')))
    else:
        for id, sent, lab in zip(*data):
            examples.append(
                InputExample(unique_id=convert_to_unicode(str(id)),
                             text_a=convert_to_unicode(sent[0]),
                             text_b=convert_to_unicode(sent[1]),
                             text_c=convert_to_unicode(sent[2]),
                             label=convert_to_unicode(lab)))
    return examples

예제 #8

0

파일 보기

파일: run_classifier.py 프로젝트: rlebras/pytorch-pretrained-BERT

    def _create_examples(self, records, set_type):
        """Creates examples for the training and dev sets."""
        examples = []
        for (i, record) in enumerate(records):

            guid = "%s-%s" % (set_type, i)

            beginning = record['InputSentence1']
            ending = record['InputSentence5']
            middle = record['RandomMiddleSentenceQuiz1']
            label = str(record['AnswerRightEnding'])

            text_a = convert_to_unicode(beginning)
            text_b = convert_to_unicode(middle + " " + ending)

            examples.append(
                InputExample(guid=guid,
                             text_a=text_a,
                             text_b=text_b,
                             label=label))
        return examples

예제 #9

0

파일 보기

    def _create_examples(self, data, set_type):
        """Creates examples for the training and dev sets."""
        examples = []
        for (i, d) in enumerate(data):
            for k in range(3):
                if data[i][2 + k] == data[i][5]:
                    answer = str(k)

            label = tokenization.convert_to_unicode(answer)

            for k in range(3):
                guid = "%s-%s-%s" % (set_type, i, k)
                text_a = tokenization.convert_to_unicode(data[i][0])
                text_b = tokenization.convert_to_unicode(data[i][k + 2])
                text_c = tokenization.convert_to_unicode(data[i][1])
                examples.append(
                    InputExample(guid=guid,
                                 text_a=text_a,
                                 text_b=text_b,
                                 label=label,
                                 text_c=text_c))

        return examples

예제 #10

0

파일 보기

파일: run_classifier.py 프로젝트: mihirkale815/bertie

def parse_wsdm_fake_news_row(row):
    guid = "%s" % (convert_to_unicode(row[0]))
    ch_text_a = convert_to_unicode(row[3])
    ch_text_b = convert_to_unicode(row[4])
    en_text_a = convert_to_unicode(row[5])
    en_text_b = convert_to_unicode(row[6])
    label = convert_to_unicode(row[-1])

    return guid, ch_text_a, ch_text_b, en_text_a, en_text_b, label

예제 #11

0

파일 보기

def convert_text_to_examples(text):
    examples = []
    unique_id = 0
    if True:
        for line in text:
            line = tokenization.convert_to_unicode(line)
            line = line.strip()
            text_a = None
            text_b = None
            m = re.match(r"^(.*) \|\|\| (.*)$", line)
            if m is None:
                text_a = line
            else:
                text_a = m.group(1)
                text_b = m.group(2)
            examples.append(
                InputExample(unique_id=unique_id, text_a=text_a,
                             text_b=text_b))
            unique_id += 1
    return examples

예제 #12

0

파일 보기

파일: bert_ds.py 프로젝트: zldoty/MLTS

def read_examples_df(df, col):
    """Read a list of `InputExample`s from an input file."""
    examples = []
    unique_id = 0
    for i, row in df.iterrows():
        line = convert_to_unicode(row[col])
        if not line:
            break
        line = line.strip()
        text_a = None
        text_b = None
        m = re.match(r"^(.*) \|\|\| (.*)$", line)
        if m is None:
            text_a = line
        else:
            text_a = m.group(1)
            text_b = m.group(2)
        examples.append(
            InputExample(unique_id=unique_id, text_a=text_a, text_b=text_b))
        unique_id += 1
    return examples

예제 #13

0

파일 보기

파일: extract_features.py 프로젝트: nweir127/pytorch-pretrained-BERT

def read_examples(input_file):
    """Read a list of `InputExample`s from an input file."""
    examples = []
    unique_id = 0
    with open(input_file, "r") as reader:
        while True:
            line = convert_to_unicode(reader.readline())
            if not line:
                break
            line = line.strip()
            text_a = None
            text_b = None
            m = re.match(r"^(.*) \|\|\| (.*)$", line)
            if m is None:
                text_a = line
            else:
                text_a = m.group(1)
                text_b = m.group(2)
            examples.append(
                InputExample(unique_id=unique_id, text_a=text_a,
                             text_b=text_b))
            unique_id += 1
    return examples

예제 #14

0

파일 보기

파일: ExtractFeatures.py 프로젝트: manas234das/Contextual_Qtn_Ans_using_BERT

def read_examples(list_of_rows):
    """
    Takes in a list of rows with row format either [line1,line2] or just [line1]
    """
    examples = []
    unique_id = 0
    for row in list_of_rows:
        # Simple conversion to unicode for processing
        for i in range(len(row)):
            row[i] = convert_to_unicode(row[i])

        text_a = None
        text_b = None
        if len(row) == 1:
            text_a = row[0]
        elif len(row) > 2 or len(row) < 1:
            raise ValueError("Atleast 1 element is required inside the row and not more than two elements are valid")
        elif len(row) == 2:
            text_a = row[0]
            text_b = row[1]
        examples.append(
            InputExample(unique_id=unique_id, text_a=text_a, text_b=text_b))
        unique_id += 1
    return examples