def make_kw_approach(filename): #Load abstracts print "Reading data file..." read_abstracts_from_xml("pubmed_result.xml") global abstracts bacteria_list = [] bacteria_list = read_txt_file(filename) i=0 for abstract in abstracts : kwe.make_keyword_candidates(abstract.AbstractText) kwe.make_cooccurrence_matrix() abstract.key_words = kwe.make_keywords_list() #print i #print abstract.key_words #Tie bacteria & abstracts #Tie keywords to bacteria: key_word<abstract_id, abstract_id, ...> for bacteria in bacteria_list : if abstract.AbstractText.rfind( bacteria.name ) != -1 : #print bacteria.name #print abstract.AbstractText bacteria.Freq += 1 for keyword in abstract.key_words : try : bacteria.key_words[keyword].append(abstract.id) except : bacteria.key_words[keyword] = [] bacteria.key_words[keyword].append(abstract.id) #i+=1 #if i == 2 : break print "Done with extraction keywords" #for bacteria in bacteria_list : # print bacteria.name # print bacteria.key_words #Adjacency matric, strong connections adj_matrix = [] for i in xrange(len(bacteria_list)): adj_matrix.append([]) for j in xrange(len(bacteria_list)): adj_matrix[i].append(0) for i in xrange( len(bacteria_list) ) : for j in xrange( (i+1), len(bacteria_list) ) : for kw in bacteria_list[i].key_words : for kww in bacteria_list[j].key_words : if kw == kww : skw = set() skw = set(list(kw)) skww = set() skww = set(list(skww)) if len(skw.symmetric_difference(skww)) > 0 : adj_matrix[i][j] += 1 print_adj_matrix_to_csv1(bacteria_list, adj_matrix, 'adj_matrix_strong.csv') for bacteria in bacteria_list : with open(str(bacteria.name)+".csv", 'wb') as csvfile : spamwriter = csv.writer(csvfile) #print bacteria.name for item in bacteria.key_words : spamwriter.writerow( str(item) )
def calculate_index_words(text): #print text kwe.make_keyword_candidates(text) kwe.make_cooccurrence_matrix() return kwe.make_keywords_list()
def make_kw_approach(filename): #Load abstracts print "Reading data file..." read_abstracts_from_xml("pubmed_result.xml") global abstracts bacteria_list = [] bacteria_list = read_txt_file(filename) i = 0 for abstract in abstracts: kwe.make_keyword_candidates(abstract.AbstractText) kwe.make_cooccurrence_matrix() abstract.key_words = kwe.make_keywords_list() #print i #print abstract.key_words #Tie bacteria & abstracts #Tie keywords to bacteria: key_word<abstract_id, abstract_id, ...> for bacteria in bacteria_list: if abstract.AbstractText.rfind(bacteria.name) != -1: #print bacteria.name #print abstract.AbstractText bacteria.Freq += 1 for keyword in abstract.key_words: try: bacteria.key_words[keyword].append(abstract.id) except: bacteria.key_words[keyword] = [] bacteria.key_words[keyword].append(abstract.id) #i+=1 #if i == 2 : break print "Done with extraction keywords" #for bacteria in bacteria_list : # print bacteria.name # print bacteria.key_words #Adjacency matric, strong connections adj_matrix = [] for i in xrange(len(bacteria_list)): adj_matrix.append([]) for j in xrange(len(bacteria_list)): adj_matrix[i].append(0) for i in xrange(len(bacteria_list)): for j in xrange((i + 1), len(bacteria_list)): for kw in bacteria_list[i].key_words: for kww in bacteria_list[j].key_words: if kw == kww: skw = set() skw = set(list(kw)) skww = set() skww = set(list(skww)) if len(skw.symmetric_difference(skww)) > 0: adj_matrix[i][j] += 1 print_adj_matrix_to_csv1(bacteria_list, adj_matrix, 'adj_matrix_strong.csv') for bacteria in bacteria_list: with open(str(bacteria.name) + ".csv", 'wb') as csvfile: spamwriter = csv.writer(csvfile) #print bacteria.name for item in bacteria.key_words: spamwriter.writerow(str(item))