コード例 #1
0
def main_test3():
    parser = CoreNLPDependencyParser(url='http://localhost:9001')
    # parser = CoreNLPParser('http://localhost:9001')
    # print(list(parser.tokenize(u'中国人为什么不能选择安乐死?')))
    # ['中国人', '为什么', '不', '能', '选择', '安乐死', '?']
    # title = '中国人为什么不能选择安乐死?'
    title = '我支持我觉得安乐死挺好的。'

    for word in parser.tokenize(title):
        print(word.lemma)
コード例 #2
0
def analyze(stext):
    parser = CoreNLPDependencyParser(url="http://localhost:9000")

    if '\r\n' in stext:
        stext = stext.replace('\r\n', '  ')
    iterator = parser.raw_parse(stext)
    parse = next(iterator)

    parse = add_offset_to_tree(parse, stext)

    return parse
コード例 #3
0
def main():
    parser = CoreNLPDependencyParser(url='http://localhost:9001')
    title = '中国人为什么不能选择安乐死?'
    # parse, = parser.raw_parse(title)
    title_sub = find_NN(title, parser)
    # tokenize
    # title_token = list(parser.tokenize(title))
    # print(title_token)

    # parse danmaku and extract subjects
    # using windows : surrounding polarity
    data = pd.read_csv('demo.csv')
    # danmaku_list = data.loc[:, 'Barrages_original']

    # get index of 1. INITIAL 2. middle 3. end
    # for now: the same padding;
    initial_time = 10.0  # what's the inital time duration?: initial 10 seconds
    start_idx = 0
    for showing_time in data['Showing_time']:
        if showing_time < initial_time:
            start_idx += 1

    # slice danmaku list
    token = 2
    initial_dan = data.loc[:start_idx, 'Barrages_original']
    ini_subj = padding_initial(title_sub, initial_dan, parser, token)

    last_data = data[start_idx + 1:]
    # subjects = []

    # padding danmaku
    # processing padding on different timezone
    last_subjects = process_padding(last_data, parser, title_sub, token,
                                    data)[0]
    subjects = ini_subj + last_subjects
    pprint(subjects)
    print(f'length of subjects: {len(subjects)}')
    sur_words = process_padding(last_data, parser, title_sub, token, data)[1]

    # polarity, according to surrounding words to define the semantic
    po_list = []
    for sur in sur_words:
        po = return_polarity(sur)
        print(f'{sur}: {po}')
        po_list.append(po)
    print(len(po_list))
コード例 #4
0
def main():
    parser = CoreNLPDependencyParser(url='http://localhost:9001')
    title = '中国人为什么不能选择安乐死?'
    # parse, = parser.raw_parse(title)
    title_sub = find_NN(title, parser)

    # parse danmaku and extract subjects
    # using windows : surrounding polarity
    data = pd.read_csv('demo.csv')

    # get index of 1. INITIAL 2. middle 3. end
    # for now: the same padding;
    initial_time = 10.0  # what's the inital time duration?: initial 10 seconds
    start_idx = 0
    for showing_time in data['Showing_time']:
        if showing_time < initial_time:
            start_idx += 1

    # slice danmaku list: initial + last, get subjects:list of lists
    initial_dan = data.loc[:start_idx, 'Barrages_original']
    ini_subj = padding_initial(title_sub, initial_dan, parser)
    last_data = data[start_idx + 1:]
    # padding danmaku
    # processing padding on different timezone
    last_subjects = process_padding_last(last_data, parser, title_sub, data)
    subjects = ini_subj + last_subjects

    # mapping and filter
    pair_idxs = map_subjects(subjects)

    # enumerate all of the danmaku and pairlying output opinion distances
    row_length = data.shape[0]  # how many rows
    m = np.array([range(row_length)])
    distance.cdist(
        m, m, lambda u, v: op_distance(u,
                                       v,
                                       data=data,
                                       parser=parser,
                                       subjects=subjects,
                                       pair_idxs=pair_idxs))
コード例 #5
0
def split_offset(offset):
    if ';' in offset:
        offset = offset.split(";")
    else:
        offset = [offset]
    ent_offset = []
    for off in offset:
        ent_offset.append(tuple([int(i) for i in off.split("-")]))

    return np.asarray(ent_offset)


if __name__ == '__main__':
    input_directory = '../data/Devel/'
    parser = CoreNLPDependencyParser(url="http://localhost:9000")

    sentences = {}
    all_entities = {}
    all_roots = {}
    all_parse = {}

    # Process each file in the directory
    for index_file, filename in enumerate(os.listdir(input_directory)):
        # Parse XML file
        root = parse_xml(input_directory + filename)
        print(" - File:", filename, "(", index_file + 1, "out of ",
              len(os.listdir(input_directory)), ")")

        for child in root:
            sid, text = get_sentence_info(child)
コード例 #6
0
def main():
    parser = CoreNLPDependencyParser(url='http://localhost:9001')
    title = '中国人为什么不能选择安乐死?'
    # parse, = parser.raw_parse(title)
    title_sub = find_NN(title, parser)

    # parse danmaku and extract subjects
    # using windows : surrounding polarity
    data = pd.read_csv('../temp/demo.csv')

    # get index of 1. INITIAL 2. middle 3. end
    # for now: the same padding;
    initial_time = 10.0  # what's the inital time duration?: initial 10 seconds
    start_idx = 0
    for showing_time in data['Showing_time']:
        if showing_time < initial_time:
            start_idx += 1

    # slice danmaku list: initial + last, get subjects:list of lists
    initial_dan = data.loc[:start_idx, 'Barrages_original']
    ini_subj = padding_initial(title_sub, initial_dan, parser)
    last_data = data[start_idx + 1:]
    # padding danmaku
    # processing padding on different timezone
    last_subjects = process_padding_last(last_data, parser, title_sub, data)
    subjects = ini_subj + last_subjects
    pair_idxs = map_subjects(subjects)

    # mapping sentence index
    map_sens = mappingsen(pair_idxs)

    # enumerate all of the danmaku and pairlying output opinion distances
    row_length = data.shape[0]  # how many rows
    count_zero_dis = 0  # how many distances are 0 between sentences?
    count_zero_dis1 = 0

    # save res into table:
    # first sentence; second sentence; opinion distance
    sentence_0_list = []
    sentence_1_list = []
    op_dis_list = []
    df = pd.DataFrame(columns=('first sentence', 'second sentence',
                               'op_distance'))
    with open('distance.txt', 'w') as f:
        import time
        start_t = time.time()
        for m in range(row_length):
            n = m + 1
            while n < row_length:
                if (m, n) in map_sens:
                    sentence_0_list.append(m)
                    sentence_1_list.append(n)
                    op_d = op_distance(subjects, m, n, data, parser, pair_idxs)
                    op_dis_list.append(op_d)
                    count_zero_dis1 += 1
                    f.write(
                        f'the distance between danmaku {m} and danmaku {n} is: {op_d}\n'
                    )

                else:
                    sentence_0_list.append(m)
                    sentence_1_list.append(n)
                    f.write(
                        f'the distance between danmaku {m} and danmaku {n} is: 0.0 \n'
                    )
                    op_dis_list.append(0.0)
                    count_zero_dis += 1

                n += 1
        end_t = time.time()
        print(f'it takes {end_t-start_t} to run 104 danmakus')
        print(f'total zero distance: {count_zero_dis}')
        print(f'total non-zero distance: {count_zero_dis1}')
        print(len(op_dis_list))
    df['first sentence'] = sentence_0_list
    df['second sentence'] = sentence_1_list
    df['op_distance'] = op_dis_list
    df.to_csv('distance_table.csv', index=False)
コード例 #7
0
ファイル: app.py プロジェクト: NekoYIQI/RNCRFSentimenter
    if index == 0:
        index += 1
        continue
    index += 1
    word_vector = line.split(" ")
    word = word_vector[0]

    vector_list = []
    for element in word_vector[1:]:
        vector_list.append(float(element))

    vector = np.asarray(vector_list)
    dic[word] = vector

# connect to coreNLP server
dep_parser = CoreNLPDependencyParser(url='http://localhost:9000')


def generate_result(sents, predictions):
    COLOR = ['orange', 'violet']
    sentences = []
    # print(sents)
    # print(predictions)
    for i in range(len(sents)):
        print(predictions[i])
        sentence = ""
        for j in range(len(sents[i])):
            if predictions[i][j] == '0':
                sentence += sents[i][j]
                sentence += " "
            elif predictions[i][j] == '1' or predictions[i][j] == '2':
コード例 #8
0
from nltk import CoreNLPDependencyParser

dep_parser = CoreNLPDependencyParser(url='http://localhost:9000')
out_file = open('./data_semEval/laptop/raw_parse_laptop_test', 'w')
in_file = open('./data_semEval/laptop/laptop_test.txt', 'r')

for line in in_file:
    parse, = dep_parser.raw_parse(line)
    out_file.write(parse.to_conll(4))
    out_file.write('\n')

in_file.close()
out_file.close()