def do_rephrase_file(fn): ''' rephrase lines in filename fn, and overwrite original file when done. ''' from load_course_sql import openfile # only needed in this function fn = path(fn) print "Rephrasing tracking log file %s" % fn sys.stdout.flush() ofn = fn.dirname() / ("tmp-" + fn.basename()) ofp = openfile(ofn, 'w') for line in openfile(fn): newline = do_rephrase_line(line) ofp.write(newline) ofp.close() oldfilename = fn.dirname() / ("old-" + fn.basename()) print " --> Done; renaming %s -> %s" % (fn, oldfilename) os.rename(fn, oldfilename) print " --> renaming %s -> %s" % (ofn, fn) os.rename(ofn, fn) sys.stdout.flush()
def getYoutubeDurations(dataset, bq_table_input, api_key, outputfilename, schema, force_recompute): ''' Add youtube durations to Video Axis file using youtube id's and then write out to specified local path to prep for google storage / bigquery upload ''' fp = openfile(outputfilename, 'w') linecnt = 0 for row_dict in bq_table_input: linecnt += 1 verified_row = OrderedDict() # Initial pass-through of keys in current row for keys in row_dict: # Only include keys defined in schema if keys in schema.keys(): verified_row[keys] = row_dict[keys] # Recompute Video Length durations if force_recompute: verified_row[VIDEO_LENGTH] = findVideoLength( dataset=dataset, youtube_id=verified_row[VIDEO_ID], api_key=api_key ) # Ensure schema type check_schema(linecnt, verified_row, the_ds=schema, coerce=True) try: fp.write(json.dumps(verified_row)+'\n') except Exception as err: print "Failed to write line %s! Error=%s, data=%s" % (linecnt, str(err), dataset) fp.close()
def getYoutubeDurations(dataset, bq_table_input, api_key, outputfilename, schema, force_recompute): ''' Add youtube durations to Video Axis file using youtube id's and then write out to specified local path to prep for google storage / bigquery upload ''' fp = openfile(outputfilename, 'w') linecnt = 0 for row_dict in bq_table_input: linecnt += 1 verified_row = OrderedDict() # Initial pass-through of keys in current row for keys in row_dict: # Only include keys defined in schema if keys in schema.keys(): verified_row[keys] = row_dict[keys] # Recompute Video Length durations if force_recompute: verified_row[VIDEO_LENGTH] = findVideoLength( dataset=dataset, youtube_id=verified_row[VIDEO_ID], api_key=api_key) # Ensure schema type check_schema(linecnt, verified_row, the_ds=schema, coerce=True) try: fp.write(json.dumps(verified_row) + '\n') except Exception as err: print "Failed to write line %s! Error=%s, data=%s" % ( linecnt, str(err), dataset) fp.close()