Exemple #1
0
	def __init__(self):
		"""Initialize database connecton and sessionmaker
		Create deals table"""

		engine = db_connect()
		create_deals_table(engine)
		self.Session = sessionmaker(bind=engine)
Exemple #2
0
def getallbehaviorsbygid(query):
    engine = db_connect()
    Session = sessionmaker(bind=engine)
    session = Session()
    behaviors = []
    myquery = session.query(Behaviors).filter(Behaviors.tid==query['gid']).filter(Behaviors.uid>0).filter(Behaviors.bcode!=0).filter(Behaviors.bcode!=1).filter(Behaviors.bcode!=2).filter(Behaviors.bcode!=3).filter(Behaviors.bcode!=5).filter(Behaviors.bcode!=11).filter(Behaviors.bcode!=12).filter(Behaviors.tcode!=0).filter(Behaviors.tcode!=3).filter(Behaviors.tcode!=4)
    total = myquery.count()
    for instance in myquery.order_by(desc(Behaviors.t)).offset(query['offset']).limit(query['limit']):
        behavior = {}
        behavior['uid'] = instance.uid
        behavior['t'] = instance.t
        behavior['bcode'] = instance.bcode
        behavior['tcode'] = instance.tcode
        behavior['tid'] = instance.tid
        behaviors.append(behavior)
    #endfor
    session.close()
   
    hasMore = 1
    number = int(query['offset']) + int(query['limit'])
    if (number>=total):
        hasMore = 0
    
    print 'get behaviors by gid - ', query['offset'], query['limit'], number, total, hasMore    
 
    return { 'numFound':total, 'hasMore':hasMore, 'behaviors':behaviors}
Exemple #3
0
 def __init__(self):
     log.start(settings.LOG_FILE)
     try:
         engine = db_connect()
         self.Session = sessionmaker(bind=engine)
     except Exception as e:
         pass
 def open_spider(self, spider):
     self.pipelinePostgres = spider.pipelinePostgres
     if self.pipelinePostgres:
         if self.pipelinePostgres.lower() == 'true':
             engine = db_connect()
             create_memex_table(engine)
             self.Session = sessionmaker(bind=engine)
Exemple #5
0
 def __init__(self, obj):
     threading.Thread.__init__(self)
     self.engine = db_connect()
     create_all_tables(self.engine)
     self.Session = sessionmaker(bind=self.engine)
     self.session = self.Session()
     self.obj = obj
Exemple #6
0
 def __init__(self):
     """
     Initializes database connection and sessionmaker.
     Creates website table.
     """
     engine = db_connect()
     self.Session = sessionmaker(bind=engine)
def query_output(rs):
    res = [] 
    engine = db_connect()
    Session = sessionmaker(bind=engine)
    session = Session()

    for i in rs:
        data = dict()
        cid = i[1][:i[1].rfind(".xml")]
        print cid,
        value = session.query(Videos).filter(Videos.cid==cid).first()
        if value:
            data = value.as_dict()
            data["danmu"] = extract_danmu_example(i[2])
            data["score"] = i[0]
            T_index = data["startDate"].find('T')
            data["date"] = data["startDate"][:T_index]
            data["hour"] = data["startDate"][T_index+1:]
            data["tag_list"] = extract_tag(data["tag_list"])
            data["u_face"] = extract_u_face(data["upinfo"])
            data["r_info"] = extract_r_info(data["upinfo"])
            res.append(data)

    print res[0]["cid"]
    print len(res),"results find!"
    return res
Exemple #8
0
 def setUp(self):
     self.pipeline = PdlScraperPipeline()
     self.item = dict(
         fecha_presentacion=u'10/10/2013',
         codigo=u'11111111111',
         numero_proyecto=u'11111111111/2014-CR',
         short_url=u'',
         titulo='',
         expediente='',
         pdf_url='',
         time_created='',
         time_edited=datetime.date.today(),
         seguimiento_page='',
         grupo_parlamentario='',
         iniciativas_agrupadas=u'00154, 00353, 00368, 00484, 00486',
         nombre_comision='',
         numero_de_ley='',
         titulo_de_ley='',
         proponente='',
         congresistas=u'Espinoza Cruz  Marisol,Abugattás '
         u'Majluf  Daniel Fernando,Acha Roma'
         u'ni  Walter,Apaza Condori  Emiliano,'
         u'Nayap Kinin  Eduardo,Reynaga'
         u'Soto  Jhon Arquimides,Valencia '
         u'Quiroz  Jaime Ruben',
         seguimientos=[
             '',
             u'28/08/2014 Decretado a... Economía',
             u' ',
         ])
     self.db = db_connect()
Exemple #9
0
def connect_db():
    engine = db_connect()
    create_table(engine)
    sessionFactory = sessionmaker(bind=engine)
    global SESSION
    SESSION = scoped_session(sessionFactory)
    print('Connect db')
Exemple #10
0
def importCalpendoIntoRMC(monthYear):
    result = run_query("call billingCalpendoByMonth('{monthYear}%')".format(monthYear=monthYear), "calpendo")
    s = db_connect("rmc")

    for row in result:
        row = list(row)
        for idx, val in enumerate(row):
            try:
                row[idx] = pymysql.escape_string(unicode(val))
            except UnicodeDecodeError:
                row[idx] = pymysql.escape_string(val.decode('iso-8859-1'))
        entry = Ris(accession_no=row[0], gco=row[1], project=row[2], MRN=row[3], PatientsName=row[4],
                    BirthDate=row[5], target_organ=row[6], target_abbr=row[7],
                    ScanDate=row[8], referring_md=row[9], Duration=row[10], ScanDTTM=row[11],
                    CompletedDTTM=row[12], Resource=row[13])
        s.add(entry)
        try:
            s.commit()
        except IntegrityError:
            print "Warning: Duplicate row detected in ris table."
            s.rollback()
        else:
            examEntry = Examcodes(target_abbr=row[7], target_organ=row[6])
            s.add(examEntry)
            try:
                s.commit()
            except IntegrityError:
                print "Warning: Examcode already exists."
                s.rollback()
    return result
def main():
    """Index alexa demographics
    """

    engine = db_connect()
    Session = sessionmaker(bind=engine)
    session = Session()

    settings = get_project_settings()
    settings.set('ITEM_PIPELINES',
                 {'demographic_scraper.demographic_scraper.pipelines.WebsiteDemographicPipeline': 300})
    settings.set('EXTENSIONS',
                 {'scrapy.telnet.TelnetConsole': None,})


    process = CrawlerProcess(settings)
    for website in session.query(WebsitesContent).all():
        demographic = list(session.query(Websites).filter_by(link=website.link))
        if len(demographic) is 0:
            url = website.link
            print website.link
            AlexaSpider.name = url
            process.crawl(AlexaSpider, url=url, db_session=session)
    process.start()
    process.stop()

    session.close()
Exemple #12
0
	def __init__(self):

		"""
		Initializes database connection and sessionmaker.
		Creates deals table.
		"""

		engine = db_connect()
		create_deals_table(engine)
		self.Session = sessionmaker( bind = engine )

		def process_item(self, item, spider):
			"""Save deals in the database.

			This method is called for every item pipeline component.

			"""
			session = self.Session()
			deal = Deals(**item)

			try:
				session.add(deal)
				session.commit()
			except:
				session.rollback()
				raise
			finally:
				session.close()

				return item
Exemple #13
0
    def createWeixiaoSimTask(self, potentialItem, items):
        print 'begin createWeixiaoSimTask...'
        # create json string
        json_task = json.dumps({
            'item': potentialItem,
            'existing': items
        },
                               separators=(',', ':'))
        print json_task

        # and put it to lelesimtask table of lelespider, FIXME
        #WeixiaoTaskService.addSimTask(json_task)
        engine = db_connect()
        Session = sessionmaker(bind=engine)
        session = Session()

        new_task = {}
        new_task['jsontask'] = json_task
        new_task['date'] = potentialItem['date']
        new_task['time'] = potentialItem['time']
        new_task['same'] = False
        new_task['status'] = '0'

        simtask = SimTask(**new_task)
        session.add(simtask)
        session.commit()
        session.close()

        print 'end createWeixiaoSimTask...'
Exemple #14
0
 def __init__(self):
     """Initializes database connection and sessionmaker.
     Creates properties table.
     """
     engine = db_connect()
     create_properties_table(engine)
     self.Session = sessionmaker(bind=engine)
def process_staged_urls():
    """Query download all staged URLs, Update Catalog and Document"""

    engine = db_connect()
    create_tables(engine)
    Session = sessionmaker(bind=engine)
    session = Session()

    # for event in session.query(EventStage).all():
    #     copy_event_from_stage(event)

    for url_record in session.query(UrlStage).all():
        # print(url_record.url)

        place_record = session.query(Place). \
            filter(Place.ocd_division_id == url_record.ocd_division_id).first()
        event_record = session.query(Event). \
            filter(Event.ocd_division_id == url_record.ocd_division_id,
                   Event.record_date == url_record.event_date,
                   Event.name == url_record.event).first()
        print(f'place id: {place_record.id}\n event_id:{event_record.id}')

        catalog_entry = session.query(Catalog). \
            filter(Catalog.url_hash == url_record.url_hash).first()

        # Document already exists in catalog
        if catalog_entry:
            catalog_id = catalog_entry.id
            print(f'catalog_id---------{catalog_id}')
            document = map_document(
                url_record, place_record.id, event_record.id, catalog_id)
            save_record(document)
            print("existing in catalog adding reference to document")

        else:
            print("Does not exist")

            # Download and save document
            catalog = Catalog(
                url=url_record.url,
                url_hash=url_record.url_hash,
                location='placeholder',
                filename=f'{url_record.url_hash}.pdf'
                )

            doc = Media(url_record)

            # download
            result = doc.gather()

            # Add to doc catalog
            if result:
                catalog.location = result
                catalog_id = save_record(catalog)
                # Add document reference
                document = map_document(
                    url_record, place_record.id, event_record.id, catalog_id)
                doc_id = save_record(document)

                print(f'Added {url_record.url_hash} doc_id: {doc_id}')
Exemple #16
0
    def get_my_urls(self):
        db = db_connect()
        start_urls = []
        append = start_urls.append

        query = "select codigo, iniciativas_agrupadas, seguimiento_page " \
                "from pdl_proyecto WHERE legislatura={} order by time_edited".format(settings.LEGISLATURE)
        res = db.query(query)

        for i in res:
            iniciativas = i['iniciativas_agrupadas']
            if type(iniciativas) == list:
                if len(iniciativas) < 1:
                    if i['seguimiento_page'] != '':
                        # this field is empty, scrape it!
                        append(i['seguimiento_page'])

            elif iniciativas is None:
                if i['seguimiento_page'] != '':
                    append(i['seguimiento_page'])

            elif iniciativas.strip() == '':
                if i['seguimiento_page'] != '':
                    append(i['seguimiento_page'])

        return start_urls
Exemple #17
0
 def __init__(self):
     """
     Initialize database connection and create tables.
     """
     engine = db_connect()
     create_tables(engine)
     self.Session = sessionmaker(bind=engine)
Exemple #18
0
def insert_item(item):
    engine = db_connect()
    create_table(engine)
    Session = sessionmaker(bind=engine)
    session = Session()

    for data in item:
        job = JobInfo(**data)
        if data['sponsored']:
            exist_row = session.query(JobInfo).filter(
                JobInfo.title == data['title'],
                JobInfo.sponsored == data['sponsored']).all()
            if not exist_row:
                session.add(job)
                session.commit()
            else:
                query = session.query(JobInfo).filter(
                    JobInfo.title == data['title'],
                    JobInfo.sponsored == data['sponsored'])
                for instance in query:
                    add_filter_salary(data, instance, query, session)

        if data['sponsored'] is None:
            exist_post_url = session.query(JobInfo).filter(
                JobInfo.post_url == data['post_url']).all()
            if not exist_post_url:
                session.add(job)
                session.commit()
            else:
                query = session.query(JobInfo).filter(
                    JobInfo.post_url == data['post_url'])
                for instance in query:
                    add_filter_salary(data, instance, query, session)
    session.close()
Exemple #19
0
    def createWeixiaoSimTask(self, potentialItem, items):
        print 'begin createWeixiaoSimTask...'
        # create json string 
        json_task = json.dumps({'item':potentialItem, 'existing':items}, separators=(',',':'))
        print json_task 

        # and put it to lelesimtask table of lelespider, FIXME
        #WeixiaoTaskService.addSimTask(json_task)
        engine = db_connect()
        Session = sessionmaker(bind=engine)
        session = Session()

        new_task = {}
        new_task['jsontask'] = json_task
        new_task['date'] = potentialItem['date']
        new_task['time'] = potentialItem['time']
        new_task['same'] = False
        new_task['status'] = '0'

        simtask = SimTask(**new_task)
        session.add(simtask)
        session.commit()
        session.close()

        print 'end createWeixiaoSimTask...'
 def __init__(self):
     """
     Initializes database connection and sessionmaker.
     Creates tables.
     """
     self.engine = db_connect()
     create_tables(self.engine)
     self.Session = sessionmaker(bind=self.engine)
Exemple #21
0
 def __init__(self):
     """
     Initializes database connection and sessionmaker.
     Creates poetryandalcohol table.
     """
     engine = db_connect()
     create_poetry_table(engine)
     self.Session = sessionmaker(bind=engine)
 def __init__(self):
     """
     initializes the database session
     creates search term tables
     """
     engine = db_connect()
     create_url_table(engine)
     self.Session = sessionmaker(bind=engine)
 def __init__(self):
     """
     Initializes database connection and sessionmaker
     Creates tables
     """
     engine = db_connect()
     create_table(engine)
     self.factory = sessionmaker(bind=engine)
Exemple #24
0
 def __init__(self):
     """
     Initializes database connection and sessionmaker.
     Creates Dentists table.
     """
     engine = db_connect()
     create_clinicas_table(engine)
     self.Session = sessionmaker(bind=engine)
 def process_item(self, item, spider):
     if spider.name == 'updater':
         log.msg("Try saving item to database: %s." % item['codigo'])
         db = db_connect()
         table = db['pdl_proyecto']
         table.update(item, ['codigo'])
         return item
     return item
Exemple #26
0
 def __init__(self, *args, **kwargs):
     engine = db_connect()
     create_deals_table(engine)
     session = sessionmaker(bind=engine)()
     self.listings = session.query(Idealista).all()
     session.close()
     locale.setlocale(locale.LC_TIME, "es_ES")
     super(IdealistaSpider, self).__init__(*args, **kwargs)
 def __init__(self):
     """
     initializes the database session
     creates search term tables
     """
     engine = db_connect()
     create_url_table(engine)
     self.Session = sessionmaker(bind=engine)
Exemple #28
0
	def __init__(self):
		"""
		Initializez database connection and sessionmaker
		Create jobs table.
		"""
		engine = db_connect()
		create_jobs_table(engine)
		self.Session = sessionmaker(bind=engine)
 def __init__(self):
     """
     Initializes database connection and sessionmaker.
     Creates deals table.
     """
     engine = db_connect()
     create_tc_tickets_table(engine)
     self.Session = sessionmaker(bind=engine)
 def __init__(self):
     """
     Initializes database connection and sessionmaker.
     Creates all tables.
     """
     engine = db_connect()
     create_tables(engine)
     self.Session = sessionmaker(bind=engine)
 def process_item(self, item, spider):
     if spider.name == 'updater':
         log.msg("Try saving item to database: %s." % item['codigo'])
         db = db_connect()
         table = db['pdl_proyecto']
         table.update(item, ['codigo'])
         return item
     return item
Exemple #32
0
 def __init__(self):
     '''
     initializes the database connections and sessionmaker
     creates all tables
     '''
     engine = db_connect()
     create_tables(engine)
     self.Session = sessionmaker(bind=engine)
	def __init__(self):
		"""
		Initializes the class by defining engine, deals table and connecting to db with defined engine.
		Creates deals table.
		"""
		engine = db_connect()
		create_deals_table(engine)
		self.Session = sessionmaker(bind=engine)
Exemple #34
0
def view_recommendations(recommendations, number_of_recommendations=20):
    recommendations_df = recommendations.to_frame()

    track_info = get_track_info(db_connect())
    recommendations_df = recommendations_df.join(track_info)

    return (recommendations_df.sort_index(
        by='score', ascending=False).head(number_of_recommendations))
Exemple #35
0
 def __init__(self):
     """Initializes database connection and sessionmaker.
        Creates deals table.
     """
     engine = db_connect()
     drop_booking_table(engine)
     create_booking_table(engine)
     self.Session = sessionmaker(bind=engine)
Exemple #36
0
    def __init__(self):
        """
		Initializez database connection and sessionmaker
		Create jobs table.
		"""
        engine = db_connect()
        create_jobs_table(engine)
        self.Session = sessionmaker(bind=engine)
Exemple #37
0
    def __init__(self):
        ''' Initializes db connection and session maker.
            Creates deals table.
        '''

        engine = db_connect()
        create_deals_table(engine)
        self.Session = sessionmaker(bind=engine)
Exemple #38
0
 def __init__(self):
   """
   Initialize database connection and sessionmaker
   Creates deals table
   """
   engine = db_connect()
   create_deals_table(engine)
   self.Session = sessionmaker(bind=engine) #binding/connection to db with the defined engine
 def __init__(self):
     """
     Initializes database connection and sessionmaker.
     Creates nfl_rosters_2015 table.
     """
     engine = db_connect()
     create_tables(engine)
     self.Session = sessionmaker(bind=engine)
 def __init__(self):
     """
     Initializes database connection and sessionmaker.
     Creates nfl_te_games_2015 table if it doesn't exist.
     """
     engine = db_connect()
     create_tables(engine)
     self.Session = sessionmaker(bind=engine)
Exemple #41
0
    def __init__(self):
        """
        Initializes database connection and sessionmaker
        Creates answers table
        """

        engine = db_connect()
        create_answers_table(engine)
        self.Session = sessionmaker(bind=engine)
Exemple #42
0
 def __init__(self, import_hours=False):
     """
     Args:
         import_hours: If True, import business hours into the
         hours table in the database
     """
     engine = db_connect()
     self.Session = sessionmaker(bind=engine)
     self.import_hours = import_hours
    def __init__(self):
        """Initializes database connection and sessionmaker.

        Creates housetrip deals table.

        """
        engine = db_connect()
        create_housetripdeals_table(engine)
        self.Session = sessionmaker(bind=engine)
 def __init__(self):
     """
     Initialises database connection and sessionmaker.
     Creates table of players
     """
     
     engine = db_connect()
     create_players_table(engine)
     self.Session = sessionmaker(bind=engine)
Exemple #45
0
 def __init__(self):
     """
     Initializes database connection and sessionmaker.
     Creates jackets table.
     """
     # print 'AsosPipeline\n\n\n'
     engine = db_connect()
     create_jackets_table(engine)
     self.Session = sessionmaker(bind=engine)
Exemple #46
0
 def __init__(self):
     """
     Initializes database connection and sessionmaker.
     Creates deals table.
     """
     self.files = {}
     engine = db_connect()
     create_deals_table(engine)
     self.Session = sessionmaker(bind=engine)
Exemple #47
0
    def __init__(self):
        """
        Initializes database connection and sessionmaker
        Creates answers table
        """

        engine = db_connect()
        create_answers_table(engine)
        self.Session = sessionmaker(bind=engine)
def reset():
	engine = db_connect()

	models_to_reset = [PlaylistTrack, TrackArtist, Track, Artist]

	for model in models_to_reset:
		model.__table__.drop(engine, checkfirst=True)
	
	for model in reversed(models_to_reset):
		model.__table__.create(engine)
Exemple #49
0
	def __init__(self):
		"""
		Initializes db connection and session maker
		Creates jobPost table
		"""
		engine = db_connect()
		create_job_post_table(engine)
		create_keyword_table(engine)
		create_job_keyword_table(engine)
		self.Session = sessionmaker(bind=engine)
def process_item(start=0,numEntries=10):
		engine=db_connect()
		database="flipkartdata"
		offset=start;stepSize=numEntries
		query="select \"id\",\"apparelURL\" from "+database+" limit "+str(stepSize)+" offset "+str(offset)
		queryRes=engine.execute(query)
		result=list()
		for element in queryRes.fetchall():
			result.append("http://www.flipkart.com"+element[1])
		return result
 def __init__(self):
     """
     Initializes database connection and sessionmaker.
     Creates deals table.
     """
     engine = db_connect()
     create_deals_table(engine)
     self.Session = sessionmaker(bind=engine)
     zipDict = self.createZip()
     global zipDict
 def __init__(self):
     """
     Initializes database connection and sessionmaker.
     Creates deals table.
     """
     engine = db_connect()
     create_deals_table(engine)
     self.Session = sessionmaker(bind=engine)
     zipDict = self.createZip()
     global zipDict
Exemple #53
0
    def test_save_item(self):
        # database should have it
        db = db_connect()
        table = db['pdl_proyecto']
        self.assertIsNotNone(table.find_one(codigo=self.item['codigo']))

        self.pipeline.save_item(self.item)

        # delete item
        table.delete(codigo=self.item['codigo'])
Exemple #54
0
    def __init__(self):
        """
        初始化class
        """

        engine = db_connect()
        create_table(engine)
        sessionFactory = sessionmaker(bind=engine)
        self.Session = scoped_session(sessionFactory)
        self.number = 1
        self.print_lock = threading.Lock()
Exemple #55
0
    def save_item(self, item):
        db = db_connect()
        table = db['visitors_visitor']

        if table.find_one(sha1=item['sha1']) is None:
            item['created'] = datetime.datetime.now()
            item['modified'] = datetime.datetime.now()
            table.insert(item)
            logging.info("Saving: {0}, date: {1}".format(item['sha1'], item['date']))
        else:
            logging.info("{0}, date: {1} is found in db, not saving".format(item['sha1'], item['date']))
    def save_iniciativas(self, item):
        """
        Try to save a list of tuples to Seguimientos model if they don't
        exist already.
        """
        log.msg("Try to save iniciativas.")
        db = db_connect()

        # get proyect id for these seguimientos
        table = db['pdl_proyecto']
        table.update(item, ['codigo'])