def write_to_db(self):
     """
     Flush params to db every couple records
     """
     iterator = iter(self.outq.get, "STOP")
     transform = lambda x: x
     insert_to_db('impressions', iterator, transform)
Esempio n. 2
0
def import_hour(dt):
    
    filename = str(dt.year)+"-"+str(dt.month)+"-"+str(dt.day)+'T'+str(dt.hour)
    get_hour_from_hive(dt, filename)
    
    iterator = open(filename)
    iterator.readline() #tsv header
    def transform(line):
        count, query_params, minute, spider = line.strip().split('\t')
        params = get_params(minute+":00", query_params)
        params['count'] = count #here the data is aggregated already
        params['spider'] = (spider == 'true')
        return params

    insert_to_db('impressions_hive', iterator, transform)
    os.system('rm '+filename)
Esempio n. 3
0
def import_hour(dt):

    filename = str(dt.year) + "-" + str(dt.month) + "-" + str(
        dt.day) + 'T' + str(dt.hour)
    get_hour_from_hive(dt, filename)

    iterator = open(filename)
    iterator.readline()  #tsv header

    def transform(line):
        count, query_params, minute, spider = line.strip().split('\t')
        params = get_params(minute + ":00", query_params)
        params['count'] = count  #here the data is aggregated already
        params['spider'] = (spider == 'true')
        return params

    insert_to_db('impressions_hive', iterator, transform)
    os.system('rm ' + filename)