def __init__(self): """Initialize database connecton and sessionmaker Create deals table""" engine = db_connect() create_deals_table(engine) self.Session = sessionmaker(bind=engine)
def getallbehaviorsbygid(query): engine = db_connect() Session = sessionmaker(bind=engine) session = Session() behaviors = [] myquery = session.query(Behaviors).filter(Behaviors.tid==query['gid']).filter(Behaviors.uid>0).filter(Behaviors.bcode!=0).filter(Behaviors.bcode!=1).filter(Behaviors.bcode!=2).filter(Behaviors.bcode!=3).filter(Behaviors.bcode!=5).filter(Behaviors.bcode!=11).filter(Behaviors.bcode!=12).filter(Behaviors.tcode!=0).filter(Behaviors.tcode!=3).filter(Behaviors.tcode!=4) total = myquery.count() for instance in myquery.order_by(desc(Behaviors.t)).offset(query['offset']).limit(query['limit']): behavior = {} behavior['uid'] = instance.uid behavior['t'] = instance.t behavior['bcode'] = instance.bcode behavior['tcode'] = instance.tcode behavior['tid'] = instance.tid behaviors.append(behavior) #endfor session.close() hasMore = 1 number = int(query['offset']) + int(query['limit']) if (number>=total): hasMore = 0 print 'get behaviors by gid - ', query['offset'], query['limit'], number, total, hasMore return { 'numFound':total, 'hasMore':hasMore, 'behaviors':behaviors}
def __init__(self): log.start(settings.LOG_FILE) try: engine = db_connect() self.Session = sessionmaker(bind=engine) except Exception as e: pass
def open_spider(self, spider): self.pipelinePostgres = spider.pipelinePostgres if self.pipelinePostgres: if self.pipelinePostgres.lower() == 'true': engine = db_connect() create_memex_table(engine) self.Session = sessionmaker(bind=engine)
def __init__(self, obj): threading.Thread.__init__(self) self.engine = db_connect() create_all_tables(self.engine) self.Session = sessionmaker(bind=self.engine) self.session = self.Session() self.obj = obj
def __init__(self): """ Initializes database connection and sessionmaker. Creates website table. """ engine = db_connect() self.Session = sessionmaker(bind=engine)
def query_output(rs): res = [] engine = db_connect() Session = sessionmaker(bind=engine) session = Session() for i in rs: data = dict() cid = i[1][:i[1].rfind(".xml")] print cid, value = session.query(Videos).filter(Videos.cid==cid).first() if value: data = value.as_dict() data["danmu"] = extract_danmu_example(i[2]) data["score"] = i[0] T_index = data["startDate"].find('T') data["date"] = data["startDate"][:T_index] data["hour"] = data["startDate"][T_index+1:] data["tag_list"] = extract_tag(data["tag_list"]) data["u_face"] = extract_u_face(data["upinfo"]) data["r_info"] = extract_r_info(data["upinfo"]) res.append(data) print res[0]["cid"] print len(res),"results find!" return res
def setUp(self): self.pipeline = PdlScraperPipeline() self.item = dict( fecha_presentacion=u'10/10/2013', codigo=u'11111111111', numero_proyecto=u'11111111111/2014-CR', short_url=u'', titulo='', expediente='', pdf_url='', time_created='', time_edited=datetime.date.today(), seguimiento_page='', grupo_parlamentario='', iniciativas_agrupadas=u'00154, 00353, 00368, 00484, 00486', nombre_comision='', numero_de_ley='', titulo_de_ley='', proponente='', congresistas=u'Espinoza Cruz Marisol,Abugattás ' u'Majluf Daniel Fernando,Acha Roma' u'ni Walter,Apaza Condori Emiliano,' u'Nayap Kinin Eduardo,Reynaga' u'Soto Jhon Arquimides,Valencia ' u'Quiroz Jaime Ruben', seguimientos=[ '', u'28/08/2014 Decretado a... Economía', u' ', ]) self.db = db_connect()
def connect_db(): engine = db_connect() create_table(engine) sessionFactory = sessionmaker(bind=engine) global SESSION SESSION = scoped_session(sessionFactory) print('Connect db')
def importCalpendoIntoRMC(monthYear): result = run_query("call billingCalpendoByMonth('{monthYear}%')".format(monthYear=monthYear), "calpendo") s = db_connect("rmc") for row in result: row = list(row) for idx, val in enumerate(row): try: row[idx] = pymysql.escape_string(unicode(val)) except UnicodeDecodeError: row[idx] = pymysql.escape_string(val.decode('iso-8859-1')) entry = Ris(accession_no=row[0], gco=row[1], project=row[2], MRN=row[3], PatientsName=row[4], BirthDate=row[5], target_organ=row[6], target_abbr=row[7], ScanDate=row[8], referring_md=row[9], Duration=row[10], ScanDTTM=row[11], CompletedDTTM=row[12], Resource=row[13]) s.add(entry) try: s.commit() except IntegrityError: print "Warning: Duplicate row detected in ris table." s.rollback() else: examEntry = Examcodes(target_abbr=row[7], target_organ=row[6]) s.add(examEntry) try: s.commit() except IntegrityError: print "Warning: Examcode already exists." s.rollback() return result
def main(): """Index alexa demographics """ engine = db_connect() Session = sessionmaker(bind=engine) session = Session() settings = get_project_settings() settings.set('ITEM_PIPELINES', {'demographic_scraper.demographic_scraper.pipelines.WebsiteDemographicPipeline': 300}) settings.set('EXTENSIONS', {'scrapy.telnet.TelnetConsole': None,}) process = CrawlerProcess(settings) for website in session.query(WebsitesContent).all(): demographic = list(session.query(Websites).filter_by(link=website.link)) if len(demographic) is 0: url = website.link print website.link AlexaSpider.name = url process.crawl(AlexaSpider, url=url, db_session=session) process.start() process.stop() session.close()
def __init__(self): """ Initializes database connection and sessionmaker. Creates deals table. """ engine = db_connect() create_deals_table(engine) self.Session = sessionmaker( bind = engine ) def process_item(self, item, spider): """Save deals in the database. This method is called for every item pipeline component. """ session = self.Session() deal = Deals(**item) try: session.add(deal) session.commit() except: session.rollback() raise finally: session.close() return item
def createWeixiaoSimTask(self, potentialItem, items): print 'begin createWeixiaoSimTask...' # create json string json_task = json.dumps({ 'item': potentialItem, 'existing': items }, separators=(',', ':')) print json_task # and put it to lelesimtask table of lelespider, FIXME #WeixiaoTaskService.addSimTask(json_task) engine = db_connect() Session = sessionmaker(bind=engine) session = Session() new_task = {} new_task['jsontask'] = json_task new_task['date'] = potentialItem['date'] new_task['time'] = potentialItem['time'] new_task['same'] = False new_task['status'] = '0' simtask = SimTask(**new_task) session.add(simtask) session.commit() session.close() print 'end createWeixiaoSimTask...'
def __init__(self): """Initializes database connection and sessionmaker. Creates properties table. """ engine = db_connect() create_properties_table(engine) self.Session = sessionmaker(bind=engine)
def process_staged_urls(): """Query download all staged URLs, Update Catalog and Document""" engine = db_connect() create_tables(engine) Session = sessionmaker(bind=engine) session = Session() # for event in session.query(EventStage).all(): # copy_event_from_stage(event) for url_record in session.query(UrlStage).all(): # print(url_record.url) place_record = session.query(Place). \ filter(Place.ocd_division_id == url_record.ocd_division_id).first() event_record = session.query(Event). \ filter(Event.ocd_division_id == url_record.ocd_division_id, Event.record_date == url_record.event_date, Event.name == url_record.event).first() print(f'place id: {place_record.id}\n event_id:{event_record.id}') catalog_entry = session.query(Catalog). \ filter(Catalog.url_hash == url_record.url_hash).first() # Document already exists in catalog if catalog_entry: catalog_id = catalog_entry.id print(f'catalog_id---------{catalog_id}') document = map_document( url_record, place_record.id, event_record.id, catalog_id) save_record(document) print("existing in catalog adding reference to document") else: print("Does not exist") # Download and save document catalog = Catalog( url=url_record.url, url_hash=url_record.url_hash, location='placeholder', filename=f'{url_record.url_hash}.pdf' ) doc = Media(url_record) # download result = doc.gather() # Add to doc catalog if result: catalog.location = result catalog_id = save_record(catalog) # Add document reference document = map_document( url_record, place_record.id, event_record.id, catalog_id) doc_id = save_record(document) print(f'Added {url_record.url_hash} doc_id: {doc_id}')
def get_my_urls(self): db = db_connect() start_urls = [] append = start_urls.append query = "select codigo, iniciativas_agrupadas, seguimiento_page " \ "from pdl_proyecto WHERE legislatura={} order by time_edited".format(settings.LEGISLATURE) res = db.query(query) for i in res: iniciativas = i['iniciativas_agrupadas'] if type(iniciativas) == list: if len(iniciativas) < 1: if i['seguimiento_page'] != '': # this field is empty, scrape it! append(i['seguimiento_page']) elif iniciativas is None: if i['seguimiento_page'] != '': append(i['seguimiento_page']) elif iniciativas.strip() == '': if i['seguimiento_page'] != '': append(i['seguimiento_page']) return start_urls
def __init__(self): """ Initialize database connection and create tables. """ engine = db_connect() create_tables(engine) self.Session = sessionmaker(bind=engine)
def insert_item(item): engine = db_connect() create_table(engine) Session = sessionmaker(bind=engine) session = Session() for data in item: job = JobInfo(**data) if data['sponsored']: exist_row = session.query(JobInfo).filter( JobInfo.title == data['title'], JobInfo.sponsored == data['sponsored']).all() if not exist_row: session.add(job) session.commit() else: query = session.query(JobInfo).filter( JobInfo.title == data['title'], JobInfo.sponsored == data['sponsored']) for instance in query: add_filter_salary(data, instance, query, session) if data['sponsored'] is None: exist_post_url = session.query(JobInfo).filter( JobInfo.post_url == data['post_url']).all() if not exist_post_url: session.add(job) session.commit() else: query = session.query(JobInfo).filter( JobInfo.post_url == data['post_url']) for instance in query: add_filter_salary(data, instance, query, session) session.close()
def createWeixiaoSimTask(self, potentialItem, items): print 'begin createWeixiaoSimTask...' # create json string json_task = json.dumps({'item':potentialItem, 'existing':items}, separators=(',',':')) print json_task # and put it to lelesimtask table of lelespider, FIXME #WeixiaoTaskService.addSimTask(json_task) engine = db_connect() Session = sessionmaker(bind=engine) session = Session() new_task = {} new_task['jsontask'] = json_task new_task['date'] = potentialItem['date'] new_task['time'] = potentialItem['time'] new_task['same'] = False new_task['status'] = '0' simtask = SimTask(**new_task) session.add(simtask) session.commit() session.close() print 'end createWeixiaoSimTask...'
def __init__(self): """ Initializes database connection and sessionmaker. Creates tables. """ self.engine = db_connect() create_tables(self.engine) self.Session = sessionmaker(bind=self.engine)
def __init__(self): """ Initializes database connection and sessionmaker. Creates poetryandalcohol table. """ engine = db_connect() create_poetry_table(engine) self.Session = sessionmaker(bind=engine)
def __init__(self): """ initializes the database session creates search term tables """ engine = db_connect() create_url_table(engine) self.Session = sessionmaker(bind=engine)
def __init__(self): """ Initializes database connection and sessionmaker Creates tables """ engine = db_connect() create_table(engine) self.factory = sessionmaker(bind=engine)
def __init__(self): """ Initializes database connection and sessionmaker. Creates Dentists table. """ engine = db_connect() create_clinicas_table(engine) self.Session = sessionmaker(bind=engine)
def process_item(self, item, spider): if spider.name == 'updater': log.msg("Try saving item to database: %s." % item['codigo']) db = db_connect() table = db['pdl_proyecto'] table.update(item, ['codigo']) return item return item
def __init__(self, *args, **kwargs): engine = db_connect() create_deals_table(engine) session = sessionmaker(bind=engine)() self.listings = session.query(Idealista).all() session.close() locale.setlocale(locale.LC_TIME, "es_ES") super(IdealistaSpider, self).__init__(*args, **kwargs)
def __init__(self): """ Initializez database connection and sessionmaker Create jobs table. """ engine = db_connect() create_jobs_table(engine) self.Session = sessionmaker(bind=engine)
def __init__(self): """ Initializes database connection and sessionmaker. Creates deals table. """ engine = db_connect() create_tc_tickets_table(engine) self.Session = sessionmaker(bind=engine)
def __init__(self): """ Initializes database connection and sessionmaker. Creates all tables. """ engine = db_connect() create_tables(engine) self.Session = sessionmaker(bind=engine)
def __init__(self): ''' initializes the database connections and sessionmaker creates all tables ''' engine = db_connect() create_tables(engine) self.Session = sessionmaker(bind=engine)
def __init__(self): """ Initializes the class by defining engine, deals table and connecting to db with defined engine. Creates deals table. """ engine = db_connect() create_deals_table(engine) self.Session = sessionmaker(bind=engine)
def view_recommendations(recommendations, number_of_recommendations=20): recommendations_df = recommendations.to_frame() track_info = get_track_info(db_connect()) recommendations_df = recommendations_df.join(track_info) return (recommendations_df.sort_index( by='score', ascending=False).head(number_of_recommendations))
def __init__(self): """Initializes database connection and sessionmaker. Creates deals table. """ engine = db_connect() drop_booking_table(engine) create_booking_table(engine) self.Session = sessionmaker(bind=engine)
def __init__(self): ''' Initializes db connection and session maker. Creates deals table. ''' engine = db_connect() create_deals_table(engine) self.Session = sessionmaker(bind=engine)
def __init__(self): """ Initialize database connection and sessionmaker Creates deals table """ engine = db_connect() create_deals_table(engine) self.Session = sessionmaker(bind=engine) #binding/connection to db with the defined engine
def __init__(self): """ Initializes database connection and sessionmaker. Creates nfl_rosters_2015 table. """ engine = db_connect() create_tables(engine) self.Session = sessionmaker(bind=engine)
def __init__(self): """ Initializes database connection and sessionmaker. Creates nfl_te_games_2015 table if it doesn't exist. """ engine = db_connect() create_tables(engine) self.Session = sessionmaker(bind=engine)
def __init__(self): """ Initializes database connection and sessionmaker Creates answers table """ engine = db_connect() create_answers_table(engine) self.Session = sessionmaker(bind=engine)
def __init__(self, import_hours=False): """ Args: import_hours: If True, import business hours into the hours table in the database """ engine = db_connect() self.Session = sessionmaker(bind=engine) self.import_hours = import_hours
def __init__(self): """Initializes database connection and sessionmaker. Creates housetrip deals table. """ engine = db_connect() create_housetripdeals_table(engine) self.Session = sessionmaker(bind=engine)
def __init__(self): """ Initialises database connection and sessionmaker. Creates table of players """ engine = db_connect() create_players_table(engine) self.Session = sessionmaker(bind=engine)
def __init__(self): """ Initializes database connection and sessionmaker. Creates jackets table. """ # print 'AsosPipeline\n\n\n' engine = db_connect() create_jackets_table(engine) self.Session = sessionmaker(bind=engine)
def __init__(self): """ Initializes database connection and sessionmaker. Creates deals table. """ self.files = {} engine = db_connect() create_deals_table(engine) self.Session = sessionmaker(bind=engine)
def reset(): engine = db_connect() models_to_reset = [PlaylistTrack, TrackArtist, Track, Artist] for model in models_to_reset: model.__table__.drop(engine, checkfirst=True) for model in reversed(models_to_reset): model.__table__.create(engine)
def __init__(self): """ Initializes db connection and session maker Creates jobPost table """ engine = db_connect() create_job_post_table(engine) create_keyword_table(engine) create_job_keyword_table(engine) self.Session = sessionmaker(bind=engine)
def process_item(start=0,numEntries=10): engine=db_connect() database="flipkartdata" offset=start;stepSize=numEntries query="select \"id\",\"apparelURL\" from "+database+" limit "+str(stepSize)+" offset "+str(offset) queryRes=engine.execute(query) result=list() for element in queryRes.fetchall(): result.append("http://www.flipkart.com"+element[1]) return result
def __init__(self): """ Initializes database connection and sessionmaker. Creates deals table. """ engine = db_connect() create_deals_table(engine) self.Session = sessionmaker(bind=engine) zipDict = self.createZip() global zipDict
def test_save_item(self): # database should have it db = db_connect() table = db['pdl_proyecto'] self.assertIsNotNone(table.find_one(codigo=self.item['codigo'])) self.pipeline.save_item(self.item) # delete item table.delete(codigo=self.item['codigo'])
def __init__(self): """ 初始化class """ engine = db_connect() create_table(engine) sessionFactory = sessionmaker(bind=engine) self.Session = scoped_session(sessionFactory) self.number = 1 self.print_lock = threading.Lock()
def save_item(self, item): db = db_connect() table = db['visitors_visitor'] if table.find_one(sha1=item['sha1']) is None: item['created'] = datetime.datetime.now() item['modified'] = datetime.datetime.now() table.insert(item) logging.info("Saving: {0}, date: {1}".format(item['sha1'], item['date'])) else: logging.info("{0}, date: {1} is found in db, not saving".format(item['sha1'], item['date']))
def save_iniciativas(self, item): """ Try to save a list of tuples to Seguimientos model if they don't exist already. """ log.msg("Try to save iniciativas.") db = db_connect() # get proyect id for these seguimientos table = db['pdl_proyecto'] table.update(item, ['codigo'])