def obtainURLsFromDB(cls, table=Business): """ obtain unscanned URLs from business table. """ with open_db(dbname='bizsearch') as bizdb: dbitems = table.fetch_by(bizdb, ['url', 'country'], 0, "country='US'") url_list = [ url for url,_ in dbitems ] domain_list = [ domain_from_url(d) for d in url_list ] print url_list print domain_list return url_list, domain_list
def __init__(self, input='db', output='db', dbname='bizsearch',dbpwd=None, *args, **kwargs): super(MySpider, self).__init__(*args, **kwargs) self.emailList = [] self.output = output if output == 'db': self.dbcursor = open_db(dbname).cursor() else: self.f = open('results.list', 'wb') self.csvWriter = csv.writer(self.f, delimiter = '\t')
def run(): with open_db("bizsearch") as conn: brecords = [ ["biz2", "biz21.com", "4044898763", "AGY", "US", "P", "Jack0"], ["biz3", "biz31.com", "4044898764", "TUR", "US", "P", "Jack2"], ["biz4", "biz41.com", "4044898765", "RLS", "US", "P", "Jack3"], ["biz5", "biz51.com", "4044898766", "EDU", "US", "P", "Jack4"] ] Business.insert_one_by_value(conn, "biz1", "biz1.com", "4044898763", "AGY", "US", "P", "Jack") Business.insert_a_batch(conn, brecords) erecords = [ ["a@b", 1, "2011-11-11", 1], ["a@c", 22, "2011-11-11", 1], ["a@d", 23, "2011-11-11", 2], ["a@e", 24, "2011-11-11", 3] ] Email.insert_one_by_value(conn, "a@b", 1, "2011-11-11", 0) Email.insert_a_batch(conn, erecords) results = Email.fetch_by(conn, ["*"], address="a@b") for r in results: print r
def __init__(self): self.conn = open_db("bizsearch_work") self.dbcursor = self.conn.cursor()