def parse( out, infile, modulestore ): logfile = open( out + '/transform.log', 'w') outfile = OutputFile( out + '/data', format , options='wb') parser = JSONToRelation( InURI( infile ) , outfile, mainTableName='EdxTrackEvent' ) parser.setParser( EdXTrackLogJSONParser( parser, 'EdxTrackEvent', dbName='Edx', moduleStore = modulestore ) ) parser.convert()
def parse(out, infile, modulestore): logfile = open(out + '/transform.log', 'w') outfile = OutputFile(out + '/data', format, options='wb') parser = JSONToRelation(InURI(infile), outfile, mainTableName='EdxTrackEvent') parser.setParser( EdXTrackLogJSONParser(parser, 'EdxTrackEvent', dbName='Edx', moduleStore=modulestore)) parser.convert()
outSQLFile = OutputFile( outFullPath, outputFormat, options='wb') # overwrite any sql file that's there jsonConverter = JSONToRelation(InURI(args.inFilePath), outSQLFile, mainTableName='EdxTrackEvent', logFile=logFile) try: # Setting useDisplayNameCache to True prevents guaranteed # pulling of Modulestore from the backup---and expensive # operation. Note that cronRefreshModulestore.sh will # cause the cache to be refreshed: jsonConverter.setParser( EdXTrackLogJSONParser(jsonConverter, 'EdxTrackEvent', replaceTables=args.dropTables, dbName='Edx', useDisplayNameCache=True)) except Exception as e: with open(logFile, 'w') as fd: fd.write( "In json2sql: could not create EdXTrackLogJSONParser; infile: %s; outfile: %s; logfile: %s (%s)" % (InURI(args.inFilePath), outSQLFile, logFile, ` e `)) # Try to delete the .sql file that was created when # the OutputFile instance was made in the JSONToRelation # instantiation statement above: try: outSQLFile.remove() except Exception as e: pass sys.exit(1)
outputFormat = OutputDisposition.OutputFormat.CSV elif args.targetFormat == 'sql_dump': outputFormat = OutputDisposition.OutputFormat.SQL_INSERT_STATEMENTS else: outputFormat = OutputDisposition.OutputFormat.SQL_INSERTS_AND_CSV outSQLFile = OutputFile(outFullPath, outputFormat, options='wb') # overwrite any sql file that's there jsonConverter = JSONToRelation(InURI(args.inFilePath), outSQLFile, mainTableName='EdxTrackEvent', logFile=logFile ) try: jsonConverter.setParser(EdXTrackLogJSONParser(jsonConverter, 'EdxTrackEvent', replaceTables=args.dropTables, dbName='Edx' )) except Exception as e: with open(logFile, 'w') as fd: fd.write("In json2sql: could not create EdXTrackLogJSONParser: %s" % `e`) # Try to delete the .sql file that was created when # the OutputFile instance was made in the JSONToRelation # instantiation statement above: try: outSQLFile.remove(); except Exception as e: pass sys.exit(1) jsonConverter.convert()
#!/usr/bin/env python import sys import os source_dir = [os.path.join(os.path.dirname(os.path.abspath(__file__)), "../json_to_relation/")] source_dir.extend(sys.path) sys.path = source_dir from json_to_relation import JSONToRelation from output_disposition import OutputPipe, OutputDisposition from input_source import InPipe from edxTrackLogJSONParser import EdXTrackLogJSONParser if __name__ == "__main__": # Create an instance of JSONToRelation, taking input from stdin, # and pumping output to stdout. Format output as SQL dump statements. jsonConverter = JSONToRelation(InPipe(), OutputPipe(OutputDisposition.OutputFormat.SQL_INSERT_STATEMENTS), mainTableName='EdxTrackEvent', logFile='/tmp/j2s.log' ) jsonConverter.setParser(EdXTrackLogJSONParser(jsonConverter, 'EdxTrackEvent', replaceTables=True, dbName='test' )) jsonConverter.convert()
if not os.access(logDir, os.W_OK): os.makedirs(logDir) logFile = os.path.join( logDir, 'j2s_%s_%s.log' % (os.path.basename(args.inFilePath), fileStamp)) # print('xpunge: %s' % args.dropTables) # print('verbose: %s' % args.verbose) # print('destDir: %s' % args.destDir) # print('in=FilePath: %s' % args.inFilePath) # print('outFullPath: %s' % outFullPath) # print('logFile: %s' % logFile) # Create an instance of JSONToRelation, taking input from the given file: # and pumping output to the given output path: jsonConverter = JSONToRelation( InURI(args.inFilePath), OutputFile(outFullPath, OutputDisposition.OutputFormat.SQL_INSERT_STATEMENTS, options='wb'), # overwrite any sql file that's there mainTableName='EdxTrackEvent', logFile=logFile) jsonConverter.setParser( EdXTrackLogJSONParser(jsonConverter, 'EdxTrackEvent', replaceTables=args.dropTables, dbName='Edx')) jsonConverter.convert()
import sys import os source_dir = [ os.path.join(os.path.dirname(os.path.abspath(__file__)), "../json_to_relation/") ] source_dir.extend(sys.path) sys.path = source_dir from json_to_relation import JSONToRelation from output_disposition import OutputPipe, OutputDisposition from input_source import InPipe from edxTrackLogJSONParser import EdXTrackLogJSONParser if __name__ == "__main__": # Create an instance of JSONToRelation, taking input from stdin, # and pumping output to stdout. Format output as SQL dump statements. jsonConverter = JSONToRelation( InPipe(), OutputPipe(OutputDisposition.OutputFormat.SQL_INSERT_STATEMENTS), mainTableName='EdxTrackEvent', logFile='/tmp/j2s.log') jsonConverter.setParser( EdXTrackLogJSONParser(jsonConverter, 'EdxTrackEvent', replaceTables=True, dbName='test')) jsonConverter.convert()