def write_to_db(self): """ Flush params to db every couple records """ iterator = iter(self.outq.get, "STOP") transform = lambda x: x insert_to_db('impressions', iterator, transform)
def import_hour(dt): filename = str(dt.year)+"-"+str(dt.month)+"-"+str(dt.day)+'T'+str(dt.hour) get_hour_from_hive(dt, filename) iterator = open(filename) iterator.readline() #tsv header def transform(line): count, query_params, minute, spider = line.strip().split('\t') params = get_params(minute+":00", query_params) params['count'] = count #here the data is aggregated already params['spider'] = (spider == 'true') return params insert_to_db('impressions_hive', iterator, transform) os.system('rm '+filename)
def import_hour(dt): filename = str(dt.year) + "-" + str(dt.month) + "-" + str( dt.day) + 'T' + str(dt.hour) get_hour_from_hive(dt, filename) iterator = open(filename) iterator.readline() #tsv header def transform(line): count, query_params, minute, spider = line.strip().split('\t') params = get_params(minute + ":00", query_params) params['count'] = count #here the data is aggregated already params['spider'] = (spider == 'true') return params insert_to_db('impressions_hive', iterator, transform) os.system('rm ' + filename)