def processFile(filep): import log, helpers, settings import os from ujson import loads import gzip times = {} logger = log.logger('olympics_samples_'+os.path.basename(filep)) try: if '.gz' in filep: f = gzip.open(filep) else: f = open(filep) logger.log( 'finding all records with location for: ' + f.name) tot_lines =0 loc_lines =0 line = f.readline() while line: #print line rec = loads(line) tot_lines += 1 condition = settings.CheckCondition(rec, settings.keywords) if condition: settings.DoSomething(rec,times) loc_lines += 1 if (loc_lines%1000==0): logger.log('Count:' + str(loc_lines) + '/' + str(tot_lines)) logger.log('Last sample : %s' %(rec['text'])) line = f.readline() ret = {'fname':f.name,'tot_lines': tot_lines, 'loc_lines': loc_lines} logger.send_final_stats(ret) except Exception as e: logger.log('Error log: ' + str(e)) # write results to file logger.log('Sending to files now..') try: helpers.write_day_wise_to_file([times],settings.OUTPUT_DIR) except Exception as e: logger.log('Error log: ' + str(e)) logger.log('Done!') return times
def processFile(filep): import log, parallels, helpers, settings import os from ujson import loads import gzip locs = {} logger = log.logger('test/AllLocsBigData_'+os.path.basename(filep)) # f = open(filep) f = gzip.open(filep) logger.log( 'finding all records with location for: ' + f.name) tot_lines =0 loc_lines =0 line = f.readline() while line: #print line rec = loads(line) tot_lines += 1 condition = parallels.bdCheckCondition(rec) if condition: parallels.bdDoSomethingMemory(rec,locs) loc_lines += 1 if (loc_lines%10000==0): logger.log('Count:' + str(loc_lines) + '/' + str(tot_lines)) line = f.readline() ret = {'fname':f.name,'tot_lines': tot_lines, 'loc_lines': loc_lines} logger.send_final_stats(ret) try: print 'just trying' except Exception as e: logger.log('Error log: ' + str(e)) # write results to file logger.log('Sending to files now..') try: helpers.write_day_wise_to_file([locs],settings.OUTPUT_DIR) except Exception as e: logger.log('Error log: ' + str(e)) logger.log('Done!') return locs
def processFile(filep): import log, parallels, helpers, settings import os from ujson import loads import gzip locs = {} logger = log.logger('test/AllLocsBigData_' + os.path.basename(filep)) # f = open(filep) f = gzip.open(filep) logger.log('finding all records with location for: ' + f.name) tot_lines = 0 loc_lines = 0 line = f.readline() while line: #print line rec = loads(line) tot_lines += 1 condition = parallels.bdCheckCondition(rec) if condition: parallels.bdDoSomethingMemory(rec, locs) loc_lines += 1 if (loc_lines % 10000 == 0): logger.log('Count:' + str(loc_lines) + '/' + str(tot_lines)) line = f.readline() ret = {'fname': f.name, 'tot_lines': tot_lines, 'loc_lines': loc_lines} logger.send_final_stats(ret) try: print 'just trying' except Exception as e: logger.log('Error log: ' + str(e)) # write results to file logger.log('Sending to files now..') try: helpers.write_day_wise_to_file([locs], settings.OUTPUT_DIR) except Exception as e: logger.log('Error log: ' + str(e)) logger.log('Done!') return locs