if StopFrags.isStopFrag(stem): continue if not stem in frags: frags[stem]=[l, [],[]] # append position to list appointed by sel sel=((end-l)/ld)>0 frags[stem][1+sel].append(end-l-(sel*ld)) if saver: saver.addDocs(D1,D2) return [(saver.save(D1,D2,bulksaver.lcsPkt(sorted(a),sorted(b),l,stem,D1,D2)) if saver else (l,stem,sorted(a),sorted(b))) for stem, (l, a, b) in frags.items() if a and b] if __name__ == "__main__": #from lenx.view.Eurlex import Doc from lenx.view.doc import Doc import pprint import sys #frag=LCS(doc1,doc2) #pprint.pprint(frag.root) d1=Doc(docid=sys.argv[1].strip('\t\n')) pprint.pprint(d1.extractMetadata()) d2=Doc(docid=sys.argv[2].strip('\t\n')) pprint.pprint(d2.extractMetadata()) #pips=pippi(Doc(docid=sys.argv[1].strip('\t\n')),Doc(docid=sys.argv[2].strip('\t\n'))) #print len(pips) #pprint.pprint(pips)