예제 #1
0
def reg_bussvc(list_line, n_token, command):
    # check if the token match the regular expression for bus service or not
    list_ftr = []
    for i in range(0, len(list_line), 3):
        split_first = 0
        split_second = 0

        if i % 3 == 0:
            split_first = list_line[i].lower().strip().split('\t')
        j = i + 1
        if j % 3 == 1:
            split_second = list_line[j].strip().split('\t')

        ftr = ''
        for k in range(0, len(split_first)):
            token = split_first[k].strip()
            if pattern_token_bussvc(token, command) is True:
                ftr += '1'
            else:
                range_k = range_text_index(k, len(split_first), n_token)
                token_text = ''
                for m in range(range_k[1], range_k[2] + 1):
                    token_text = token_text + ' ' + split_first[m]

                token_text = token_text.strip()

                if pattern_tokenText_bussvc(token, token_text, command):
                    ftr += '1'
                else:
                    ftr += '0'
        # print len(split_first), split_first
        # print len(ftr), ftr
        print i / 3
        list_ftr.append(ftr)
    return list_ftr
예제 #2
0
def extract_ftr_gt_road_busstop(list_line, command, n_token):
    list_dict = load_dict(command)
    list_ftr = []
    cnt = 0
    for i in range(0, len(list_line), 3):
        split_first = 0
        split_second = 0

        if i % 3 == 0:
            split_first = list_line[i].strip().split('\t')
        j = i + 1
        if j % 3 == 1:
            split_second = list_line[j].strip().split('\t')

        for k in range(0, len(split_second)):
            label = 0
            if command == 'road':
                label = 2
            elif command == 'busstop':
                label = 3

            if label == 0:  # quit if we don't have the correct command
                print 'Give the correct command'
                quit()

            if (int(split_second[k] == label)) or (split_first[k].strip().lower() in list_dict):
                word = split_first[k].lower() + ' '  # take the word which in svc dictionary
                range_k = range_text_index(k, len(split_second), n_token)

                ftr_text = ''
                for m in range(range_k[1], range_k[2] + 1):
                    ftr_text = ftr_text + ' ' + split_first[m]

                ftr_text = ftr_text.strip()
                if int(split_second[k]) == label:
                    list_ftr.append(str(cnt) + '\t' + 'TRUE' + '\t' + word + '\t' + ftr_text)
                else:
                    list_ftr.append(str(cnt) + '\t' + 'FALSE' + '\t' + word + '\t' + ftr_text)
        cnt += 1

    for value in list_ftr:
        print value
    print 'Length of list features is: %i' % len(list_ftr)
예제 #3
0
def extract_ftr_gt_svc(list_line, load_svc, n_token):
    list_ftr = []
    cnt = 0
    cnt_true_svc = 0
    for i in range(0, len(list_line), 3):
        split_first = 0
        split_second = 0

        if i % 3 == 0:
            split_first = list_line[i].strip().split('\t')
        j = i + 1
        if j % 3 == 1:
            split_second = list_line[j].strip().split('\t')

        for k in range(0, len(split_second)):
            if split_first[k] in load_svc:  # mean bus svc
                svc = split_first[k].lower() + ' '  # take the word which in svc dictionary
                range_k = range_text_index(k, len(split_second), n_token)

                ftr_text = ''
                for m in range(range_k[1], range_k[2] + 1):
                    ftr_text = ftr_text + ' ' + split_first[m]

                ftr_text = ftr_text.strip()

                if int(split_second[k]) == 1:
                    list_ftr.append(str(cnt) + '\t' + 'TRUE' + '\t' + svc + '\t' + ftr_text)
                elif int(split_second[k]) == 0:
                    list_ftr.append(str(cnt) + '\t' + 'FALSE' + '\t' + svc + '\t' + ftr_text)

            if int(split_second[k]) == 1:
                cnt_true_svc += 1
        cnt += 1

    for value in list_ftr:
        print value
    print 'Length of list features is: %i' % len(list_ftr)
    print 'Length of bus service labeling TRUE is: %i' % cnt_true_svc