def insertResultsLinkDictIntoDB(self, resultsLinkDict, googleSearchQueryObj, printing, printingDebug): if self.conn!=None and type(self.dbTableName)==type(""): if printing: print("\n\t\t\tTrying to insert results obtained...") try: resultCount = 0 ## gives priority of results. ResultNumber = 1 means the top result repeatCount = 0 uniqueResultNumber=0 if printingDebug: print("\n\n\nresultsLinkDict=\n%s\n\n"%resultsLinkDict) print("sorted(resultsLinkDict.keys()) = %s\n\n\n"%sorted(resultsLinkDict.keys())) for pageNum in sorted(resultsLinkDict.keys()): for resultNumberOnPage in range(1, len(resultsLinkDict[pageNum])+1): ## Insert the url into the database. resUrl = resultsLinkDict[pageNum][resultNumberOnPage-1] ## We always increment the result number to show where it would be on the page, even in case of duplicates resultCount += 1 try: topic = googleSearchQueryObj.getTopicStringForDB() startDate = googleSearchQueryObj.getDaterangeFrom() endDate = googleSearchQueryObj.getDaterangeTO() sqliteDefaults.insert_table_sqlite(self.conn, self.dbTableName, ('resultNumberInSearch','URL', 'Topic','ResultPageNumber', 'ResultNumberOnPage','StartDate', 'EndDate', 'SearchedOnDate', 'ObtainedFromQuery'), [(resultCount, resUrl, topic, pageNum, resultNumberOnPage, startDate, endDate, datetime.datetime.now().date(), googleSearchQueryObj.toString() ) ], printing_debug = printingDebug ) uniqueResultNumber += 1 except Exception, e: errorString = str(e).lower() if errorString.find('syntax error')==-1: repeatCount += 1 if printing: print("\n\t\t\t\tCould not insert topic-url pair ( %s , %s )"%(topic, resUrl)) print("\t\t\t\tError description: %s\n"%e) if printingDebug: traceback.print_stack() if printing: print("\n\t\t\t\tNumber of URLs repeated: (%s/%s)"%(repeatCount,resultCount)) print("\t\t\t\tNumber of unique URLs inserted: (%s/%s)\n"%(uniqueResultNumber, resultCount)) except Exception, e: if printing: print("\t\t\tERROR in GoogleSearch._insertResultsLinkDictIntoDB(): Cannot insert results extracted so far into database.\n") print("\n\t\t\tERROR description: %s"%e) if printingDebug: print("\n\t\t\tPrinting stack traceback:\n") traceback.print_stack() print("\n\n\n")
traceback.print_exc() try: print "\n\n\n\tTrying to insert results obtained so far into...\n" repeat_count=0 result_number=1 ## gives priority of results. ResultNumber = 1 means the top result unique_result_number=1 for page_num in results_link_dict: for res_url in results_link_dict[page_num]: ## Insert the url into the database. try : sqliteDefaults.insert_table_sqlite(conn, db_table_name, ('URL', 'Topic', 'StartDate', 'EndDate', 'ResultPageNumber', 'ResultNumber'), [(res_url, topic, start_date, end_date, page_num, result_number)] ) unique_result_number+=1 except Exception: print "\t\tCould not insert topic-url pair ( %s , %s ), possible duplicate"%(topic, res_url) repeat_count=repeat_count+1 result_number=result_number+1 ## We always increment te result number to show where it would be on the page, even in case of duplicates print "\n\n\tNumber of URLs repeated in this search = %s"%(repeat_count) print "\tNumber of URLs extracted in this search = %s"%(unique_result_number-1) except Exception: print "\n\t\tERROR: Cannot insert results extracted so far into database.\n" exit()
traceback.print_exc() try: print "\n\n\n\tTrying to insert results obtained so far into...\n" repeat_count = 0 result_number = 1 ## gives priority of results. ResultNumber = 1 means the top result unique_result_number = 1 for page_num in results_link_dict: for res_url in results_link_dict[page_num]: ## Insert the url into the database. try: sqliteDefaults.insert_table_sqlite( conn, db_table_name, ('URL', 'Topic', 'StartDate', 'EndDate', 'ResultPageNumber', 'ResultNumber'), [(res_url, topic, start_date, end_date, page_num, result_number)]) unique_result_number += 1 except Exception: print "\t\tCould not insert topic-url pair ( %s , %s ), possible duplicate" % ( topic, res_url) repeat_count = repeat_count + 1 result_number = result_number + 1 ## We always increment te result number to show where it would be on the page, even in case of duplicates print "\n\n\tNumber of URLs repeated in this search = %s" % ( repeat_count) print "\tNumber of URLs extracted in this search = %s" % ( unique_result_number - 1) except Exception:
temp_list = list(sorted([j for j in company_dict])) for j in temp_list: if j >= 100: print "%s : %s"%(j, company_dict[j]) while True: select = raw_input("\n>") if not select: ensure_not_double_query = "SELECT * from articles_clean where article_url='%s'"%(articles[i][0]) if len(sqliteDefaults.verified_select_sqlite(conn,ensure_not_double_query)) == 0: sqliteDefaults.insert_table_sqlite(conn, 'articles_clean', ('company_or_sector', 'article_url'), [ (articles[i][1], articles[i][0]) ] ) i+=1 ## Go to next entry else: print "ERROR #1: pair already exists in table" y = raw_input() break; elif select.lower() == 'p' or select.lower() == 'b':