def do_rephrase_file(fn):
    '''
    rephrase lines in filename fn, and overwrite original file when done.
    '''

    from load_course_sql import openfile	# only needed in this function

    fn = path(fn)

    print "Rephrasing tracking log file %s" % fn
    sys.stdout.flush()

    ofn = fn.dirname() / ("tmp-" + fn.basename())
    ofp = openfile(ofn, 'w')

    for line in openfile(fn):
        newline = do_rephrase_line(line)
        ofp.write(newline)

    ofp.close()
    
    oldfilename = fn.dirname() / ("old-" + fn.basename())
    print "  --> Done; renaming %s -> %s" % (fn, oldfilename)
    os.rename(fn, oldfilename)
    print "  --> renaming %s -> %s" % (ofn, fn)
    os.rename(ofn, fn)
    sys.stdout.flush()
def do_rephrase_file(fn):
    '''
    rephrase lines in filename fn, and overwrite original file when done.
    '''

    from load_course_sql import openfile  # only needed in this function

    fn = path(fn)

    print "Rephrasing tracking log file %s" % fn
    sys.stdout.flush()

    ofn = fn.dirname() / ("tmp-" + fn.basename())
    ofp = openfile(ofn, 'w')

    for line in openfile(fn):
        newline = do_rephrase_line(line)
        ofp.write(newline)

    ofp.close()

    oldfilename = fn.dirname() / ("old-" + fn.basename())
    print "  --> Done; renaming %s -> %s" % (fn, oldfilename)
    os.rename(fn, oldfilename)
    print "  --> renaming %s -> %s" % (ofn, fn)
    os.rename(ofn, fn)
    sys.stdout.flush()
Ejemplo n.º 3
0
def getYoutubeDurations(dataset, bq_table_input, api_key, outputfilename, schema, force_recompute):
    '''
    Add youtube durations to Video Axis file using youtube id's and then write out to specified local path to prep for google storage / bigquery upload
    '''
    
    fp = openfile(outputfilename, 'w')
    linecnt = 0
    for row_dict in bq_table_input:
        
        linecnt += 1
        verified_row = OrderedDict()
        
        # Initial pass-through of keys in current row
        for keys in row_dict:
            
            # Only include keys defined in schema
            if keys in schema.keys():
                verified_row[keys] = row_dict[keys]
            
        # Recompute Video Length durations
        if force_recompute:
            verified_row[VIDEO_LENGTH] = findVideoLength( dataset=dataset, youtube_id=verified_row[VIDEO_ID], api_key=api_key )
        
        # Ensure schema type
        check_schema(linecnt, verified_row, the_ds=schema, coerce=True)
        
        try:
            fp.write(json.dumps(verified_row)+'\n')
        except Exception as err:
            print "Failed to write line %s!  Error=%s, data=%s" % (linecnt, str(err), dataset)
    
    fp.close()
Ejemplo n.º 4
0
def getYoutubeDurations(dataset, bq_table_input, api_key, outputfilename,
                        schema, force_recompute):
    '''
    Add youtube durations to Video Axis file using youtube id's and then write out to specified local path to prep for google storage / bigquery upload
    '''

    fp = openfile(outputfilename, 'w')
    linecnt = 0
    for row_dict in bq_table_input:

        linecnt += 1
        verified_row = OrderedDict()

        # Initial pass-through of keys in current row
        for keys in row_dict:

            # Only include keys defined in schema
            if keys in schema.keys():
                verified_row[keys] = row_dict[keys]

        # Recompute Video Length durations
        if force_recompute:
            verified_row[VIDEO_LENGTH] = findVideoLength(
                dataset=dataset,
                youtube_id=verified_row[VIDEO_ID],
                api_key=api_key)

        # Ensure schema type
        check_schema(linecnt, verified_row, the_ds=schema, coerce=True)

        try:
            fp.write(json.dumps(verified_row) + '\n')
        except Exception as err:
            print "Failed to write line %s!  Error=%s, data=%s" % (
                linecnt, str(err), dataset)

    fp.close()