def downloadCorpus(snapshotDir, corpusDir, projectName, configInfo): # 2. Dump the snapshots for a project msg = '---------------------------------------------------- \n' msg += ' Dump the corpus for project %s \n' % projectName msg += '---------------------------------------------------- \n' print(msg) project_snapshot_dir = os.path.join(snapshotDir, projectName) project_corpus_dir = os.path.join(corpusDir, projectName) if os.path.isdir(project_corpus_dir): print "!! %s already exists...returning \n" % project_corpus_dir #return corpus = Corpus(project_snapshot_dir, 'java', project_corpus_dir, configInfo) #logging.debug(corpus) #print corpus corpus.dump()
if not os.path.isdir(args.proj_dir): print "!! Please provide a valid directory, given: %s" % ( args.proj_dir) sys.exit() print "Going to process project %s for %s" % (args.proj_dir, args.lang) print "Creating output directory at %s" % (args.out_dir) Util.cleanup(args.log_file) #od = OutDir(args.out_dir) #od.create_out_dir(args.out_dir) Log.setLogger(args.verbose, args.log_file) cfg = Config(args.config_file) corpus = Corpus(args.proj_dir, args.lang, args.out_dir, cfg) logging.debug(corpus) corpus.dump() #print corpus.printSnapshots() #1. Fetch one line changes of a given project for a given language from the database #edits = fetchEdits(cfg, args.proj_dir, args.lang) #2. build the test corpus based on each edit #createTrainingCorpus(args.proj_dir, args.lang, args.out_dir, edits) #createTestCorpus(args.proj_dir, args.lang, od, edits)