Exemple #1
0
def generate_output_multiclass(model,
                               input,
                               gold,
                               doc_list_sub,
                               processed_path,
                               output_pred_path,
                               pred=True,
                               data_folder="",
                               format_abbre=".TimeNorm.system.completed.xml"):
    non_operator = read.textfile2list(non_operator_path)
    operator = read.textfile2list(operator_path)
    labels_index = [non_operator, operator, operator]
    classes, probs = output.make_prediction_function_multiclass(
        input, model, output_pred_path)
    if pred == True:
        np.save(output_pred_path + "/y_predict_classes" + data_folder, classes)
        read.savein_pickle(output_pred_path + "/y_predict_proba" + data_folder,
                           probs)

    spans = list()
    int2labels = list()
    for index in range(len(classes)):

        class_loc = output.found_location_with_constraint(classes[index])
        span = output.loc2span(class_loc, probs[index], post_process=False)
        spans.append(span)

        one_hot = read.counterList2Dict(list(enumerate(labels_index[index],
                                                       1)))
        one_hot = {y: x for x, y in one_hot.items()}
        int2label = dict((int, char) for char, int in one_hot.items())
        int2labels.append(int2label)

    n_marks = 3
    sent_index = 0

    for data_id in range(0, len(doc_list_sub)):
        sent_spans = read.readfrom_json(
            os.path.join(processed_path, doc_list_sub[data_id],
                         doc_list_sub[data_id] + "_sent"))
        data_span = list()
        for sent_span in sent_spans:
            for index in range(len(classes)):
                span_list = spans[index][sent_index]
                if len(span_list[0]) < 1:
                    pass
                else:
                    for [posi_start, posi_end, label] in span_list:
                        data_span.append([
                            posi_start - n_marks + sent_span[1],
                            posi_end - n_marks + sent_span[1],
                            int2labels[index][label]
                        ])
            sent_index += 1
        data = span2xmlfiles(data_span, doc_list_sub[data_id])
        output_path = os.path.join(output_pred_path, doc_list_sub[data_id],
                                   doc_list_sub[data_id])
        read.create_folder(output_path)
        data.to_file(output_path + format_abbre)
    del classes, probs, input
Exemple #2
0
def make_prediction_function_multiclass(x_data,
                                        model,
                                        output_path,
                                        version=""):
    y_predict = model.predict(x_data, batch_size=32)
    if len(y_predict) >= 2:
        classes = prob2classes_multiclasses_multioutput(y_predict)
    else:
        classes = prob2classes_multiclasses(y_predict)

    if not os.path.exists(output_path):
        os.makedirs(output_path)
    np.save(output_path + "/y_predict_classes" + version, classes)
    read.savein_pickle(output_path + "/y_predict_proba" + version, y_predict)

    return classes, y_predict
Exemple #3
0
def generate_output_multiclass(model,input,gold,doc_list_sub, processed_path,output_pred_path,pred =True,data_folder = "",format_abbre = ".TimeNorm.system.completed.xml"):
    non_operator = read.textfile2list(non_operator_path)
    operator = read.textfile2list(operator_path)
    labels_index = [non_operator,operator,operator]
    classes, probs = output.make_prediction_function_multiclass(input, model, output_pred_path)
    if pred == True:
        np.save(output_pred_path + "/y_predict_classes"+data_folder, classes)
        read.savein_pickle(output_pred_path + "/y_predict_proba"+data_folder, probs)

    spans = list()
    int2labels = list()
    for index in range(len(classes)):

        class_loc = output.found_location_with_constraint(classes[index])
        span = output.loc2span(class_loc, probs[index],post_process = False)
        spans.append(span)

        one_hot = read.counterList2Dict(list(enumerate(labels_index[index], 1)))
        one_hot = {y: x for x, y in one_hot.items()}
        int2label = dict((int, char) for char, int in one_hot.items())
        int2labels.append(int2label)

    n_marks =3
    sent_index = 0

    for data_id in range(0,len(doc_list_sub)):
        sent_spans = read.readfrom_json(os.path.join(processed_path,doc_list_sub[data_id],doc_list_sub[data_id]+"_sent"))
        data_span = list()
        for sent_span in sent_spans:
            for index in range(len(classes)):
                span_list = spans[index][sent_index]
                if len(span_list[0]) <1:
                    pass
                else:
                    for [posi_start,posi_end,label] in span_list:
                        data_span.append([posi_start-n_marks+sent_span[1],posi_end-n_marks+ sent_span[1],int2labels[index][label]])
            sent_index += 1
        data = span2xmlfiles(data_span,doc_list_sub[data_id])
        output_path = os.path.join(output_pred_path,doc_list_sub[data_id],doc_list_sub[data_id])
        read.create_folder(output_path)
        data.to_file(output_path+format_abbre)
    del classes,probs,input
def generate_output_multiclass(sent_len,model,input,doc_list_sub, processed_path,output_pred_path,pred =True,data_folder = "",format_abbre = ".TimeNorm.system.completed.xml"):
    non_operator = read.textfile2list(non_operator_path)
    print('non_operator')
    print(non_operator)
    operator = read.textfile2list(operator_path)
    print('operator')
    print(operator)
    labels_index = [non_operator,operator,operator]
    print('labels_index')
    print(labels_index)
    classes, probs = output.make_prediction_function_multiclass(input, model, output_pred_path)
    print('sent_len')
    print(sent_len)
    print('classes, probs - ')
    print(classes)
    #print(probs)
    if pred == True:
        np.save(output_pred_path + "/y_predict_classes"+data_folder, classes)
        read.savein_pickle(output_pred_path + "/y_predict_proba"+data_folder, probs)

    spans = list()
    int2labels = list()
    for index in range(len(classes)):
        class_loc = output.found_location_with_constraint(classes[index], sent_len)
        print('class_loc')
        print(class_loc)
        span = output.loc2span(class_loc, probs[index],post_process = False)
        print('span')
        print(span)
        spans.append(span)

        one_hot = read.counterList2Dict(list(enumerate(labels_index[index], 1)))
        one_hot = {y: x for x, y in one_hot.items()}
        int2label = dict((int, char) for char, int in one_hot.items())
        int2labels.append(int2label)

    n_marks =3
    sent_index = 0

    for data_id in range(0,len(doc_list_sub)):
        print('HERE %s', doc_list_sub[data_id])
        print(os.path.join(processed_path,doc_list_sub[data_id]+"_sent"))
        sent_spans = read.readfrom_json(os.path.join(processed_path,doc_list_sub[data_id]+"_sent"))
        print('sent_spans %s', sent_spans)
        data_span = list()
        for sent_span in sent_spans:
            """ print('sent_span - ')
            print(sent_span)
            posi_start = sent_span[1]
            posi_end = sent_span[2]
            label = sent_span[0]
            print('posi_start:%s posi_end:%s, label:%s', (posi_start,posi_end,label))
            data_span.append([posi_start-n_marks+sent_span[1],posi_end-n_marks+ sent_span[1],int2labels[index][label]]) """
            print('classes - ', range(len(classes)))
            for index in range(len(classes)):
                #print('index - ' + index + ':' + sent_index)
                """ print(index)
                print(sent_index) """
                span_list = spans[index][sent_index]
                #print('span_list - ')
                #print(len(span_list[0]))
                #print(span_list)
                if len(span_list[0]) <1:
                    pass
                else:
                    for [posi_start,posi_end,label] in span_list:
                        print('posi_start:%s posi_end:%s, label:%s', (posi_start,posi_end,label))
                        data_span.append([posi_start-n_marks+sent_span[1],posi_end-n_marks+ sent_span[1],int2labels[index][label]])
            sent_index += 1
        print('data_span - ', data_span)
        data = span2xmlfiles(data_span,doc_list_sub[data_id])
        print('data %s', data)
        output_path = os.path.join(output_pred_path,doc_list_sub[data_id],doc_list_sub[data_id])
        read.create_folder(output_path)
        data.to_file(output_path+format_abbre)
    del classes,probs,input