def trans(src, dst, index, label_index, mode='w', sep='/'):
    lid = indexer.Indexer(label_index, mode)
    inder = indexer.Indexer(index, mode)

    #lid=indexer.Indexer(label_index,'r')
    #print(lid,mode)

    file = open(dst, 'wb')
    ln = 0
    #print(src)
    for line in open(src, encoding='utf8'):
        ln += 1
        #print(line)

        wts = [x.rpartition(sep) for x in line.strip().split(' ')]
        if sep == ' ':
            tags = ['' for x in wts]
            line = [x[-1] for x in wts]
        else:
            tags = [x[-1] for x in wts]
            line = [x[0] for x in wts]

        #print(len(wts))
        #input()
        seq = ''.join(line)
        #if (mode=='r'):
        #print(seq)
        #input()

        graph = []

        fs = [
            filter(lambda x: x >= 0, [inder(k) for k in gen_keys(seq, x)])
            for x in range(len(seq))
        ]
        for c, v in zip(_to_tags(tags, line, lid), fs):
            graph.append([0, [], c, v])
        if not graph: continue
        graph[0][0] += 1
        graph[-1][0] += 2
        for i in range(1, len(graph)):
            graph[i][1] = [i - 1]

        json_to_binary.graph_to_file(graph, file)

        if ln % 1000 == 0:
            print(ln)
            #if ln>5000:break

    file.close()
    print(len(inder))

    print('the end')
def trans(src,dst,index,label_index,mode='w',sep='/'):
    lid=indexer.Indexer(label_index,mode)
    inder=indexer.Indexer(index,mode)
    
    #lid=indexer.Indexer(label_index,'r')
    #print(lid,mode)
    
    file=open(dst,'wb')
    ln=0
    #print(src)
    for line in open(src,encoding='utf8'):
        ln+=1
        #print(line)
        
        wts=[x.rpartition(sep) for x in line.strip().split(' ')]
        if sep==' ':
            tags=['' for x in wts]
            line=[x[-1] for x in wts]
        else:
            tags=[x[-1] for x in wts]
            line=[x[0] for x in wts]
        
        #print(len(wts))
        #input()
        seq=''.join(line)
        #if (mode=='r'):
        #print(seq)
        #input()
        
        graph=[]
        
        fs=[filter(lambda x:x>=0,[inder(k) for k in gen_keys(seq,x)]) for x in range(len(seq))]
        for c,v in zip(_to_tags(tags,line,lid),fs):
            graph.append([0,[],c,v])
        if not graph:continue
        graph[0][0]+=1;
        graph[-1][0]+=2;
        for i in range(1,len(graph)):
            graph[i][1]=[i-1]
        
        json_to_binary.graph_to_file(graph,file)

        if ln%1000==0:
            print(ln)
            #if ln>5000:break

    file.close()
    print(len(inder))
    
    print('the end')
Example #3
0
def trans(src, dst, index, label_index, mode="w", sep="/", dictionary=None):
    lid = indexer.Indexer(label_index, mode)
    inder = indexer.Indexer(index, mode)
    if dictionary:
        dict_feature = DictFeature(dictionary)

    file = open(dst, "wb")
    ln = 0
    for line in open(src, encoding="utf8"):
        ln += 1
        wts = [x.rpartition(sep) for x in line.strip().split(" ")]
        if sep == " ":
            tags = ["" for x in wts]
            line = [x[-1] for x in wts]
        else:
            tags = [x[-1] for x in wts]
            line = [x[0] for x in wts]
        seq = "".join(line)
        graph = []

        fs = [[inder(k) for k in gen_keys(seq, x)] for x in range(len(seq))]
        if dictionary:
            dict_feature(seq, inder, fs)
        fs = [list(filter(lambda x: x >= 0, fv)) for fv in fs]

        # print(fs)
        # input()
        for c, v in zip(_to_tags(tags, line, lid), fs):
            graph.append([0, [], c, v])
        if not graph:
            continue
        graph[0][0] += 1
        graph[-1][0] += 2
        for i in range(1, len(graph)):
            graph[i][1] = [i - 1]

        json_to_binary.graph_to_file(graph, file)

        if ln % 1000 == 0:
            print(ln)
            # if ln>5000:break

    file.close()
    print(len(inder))

    print("the end")
Example #4
0
def test(index,src,dst):

    inder=indexer.Indexer(index,'r')
    file=open(dst,'wb')
    for line in open(src,encoding='utf8'):
        line=line.split()
        seq=''.join(line)
        graph=[]
        fs=[filter(lambda x:x>=0,[inder(k) for k in gen_keys(seq,x)]) for x in range(len(seq))]
        for c,v in zip(_to_tags(line),fs):
            graph.append([0,[],c,v])
        if not graph:continue
        graph[0][0]+=1;
        graph[-1][0]+=2;
        for i in range(1,len(graph)):
            graph[i][1]=[i-1]
        json_to_binary.graph_to_file(graph,file)
    print('the end')
    file.close()
Example #5
0
def trans(src, dst, index, label_index, mode='w', sep='/', dictionary=None):
    lid = indexer.Indexer(label_index, mode)
    inder = indexer.Indexer(index, mode)
    if dictionary: dict_feature = DictFeature(dictionary)

    file = open(dst, 'wb')
    ln = 0
    for line in open(src, encoding='utf8'):
        ln += 1
        wts = [x.rpartition(sep) for x in line.strip().split(' ')]
        if sep == ' ':
            tags = ['' for x in wts]
            line = [x[-1] for x in wts]
        else:
            tags = [x[-1] for x in wts]
            line = [x[0] for x in wts]
        seq = ''.join(line)
        graph = []

        fs = [[inder(k) for k in gen_keys(seq, x)] for x in range(len(seq))]
        if dictionary: dict_feature(seq, inder, fs)
        fs = [list(filter(lambda x: x >= 0, fv)) for fv in fs]

        #print(fs)
        #input()
        for c, v in zip(_to_tags(tags, line, lid), fs):
            graph.append([0, [], c, v])
        if not graph: continue
        graph[0][0] += 1
        graph[-1][0] += 2
        for i in range(1, len(graph)):
            graph[i][1] = [i - 1]

        json_to_binary.graph_to_file(graph, file)

        if ln % 1000 == 0:
            print(ln)
            #if ln>5000:break

    file.close()
    print(len(inder))

    print('the end')