"<" + str(id) + "," + str(int(matches.groups()[1]) + 1) + "," + str(in_title[term.lower()]) + ">", ) except Exception, e: print "80:", e return inverted_index.replace( matches.groups()[0], "<" + str(id) + "," + str(int(matches.groups()[1]) + 1) + ",0>" ) else: try: return uummuuWord.add_to_index(inverted_index, id, 1, in_title[term.lower()]) except Exception, e: print "86:", e return uummuuWord.add_to_index(inverted_index, id, 1, 0) else: matches = re.search("(<" + str(id) + ",(\d)*,([0|1])>)", inverted_index) if len(matches.groups()) == 0: return inverted_index else: if int(matches.groups()[1]) > 1: try: return inverted_index.replace( matches.groups()[0], "<" + str(id)
styled_objs, no_style_objs = main_style_parse(parser.styled_objects, parser.full_style, True); for key in styled_objs.keys(): print key + ' ==> ' + str(styled_objs[key]); def insert_into_db(cursor, doc_id, word, weight, freq, in_title): word = word.replace("'", ""); if len(word) == 0: return; cursor.execute("SELECT invert_list FROM sites_index WHERE word = '%s';" %word); results = cursor.fetchall(); present = False; if(len(word) > 124): word = word[0:124]; try: entry = results[0][0]; present = True; except Exception, e: entry = ""; present = False; entry = add_to_index(entry, doc_id, weight, freq, in_title); if not present: cursor.execute("INSERT into sites_index(word, invert_list, freq, length, num_docs) VALUES ('%s', '%s', %d, %d, 1);" %(word, entry, freq, len(word))); else: cursor.execute("UPDATE sites_index set invert_list = '%s', num_docs = num_docs + 1, freq = freq + %d where word = '%s';" %(entry, freq, word)); try: cursor.execute("INSERT INTO sites_worddoc(word, doc_id, occurrence, weight, in_title) VALUES ('%s', %d, %d, %d, %d);" %(word, doc_id, freq, weight, in_title)); except Exception, e: print "word was:", word, 'length is:', len(word); print 'I quit because:', e;