예제 #1
0
def downloadCorpus(snapshotDir, corpusDir, projectName, configInfo):

    # 2. Dump the snapshots for a project
    msg = '---------------------------------------------------- \n'
    msg += ' Dump the corpus for project %s \n' % projectName
    msg += '---------------------------------------------------- \n'
    print(msg)

    project_snapshot_dir = os.path.join(snapshotDir, projectName)
    project_corpus_dir = os.path.join(corpusDir, projectName)

    if os.path.isdir(project_corpus_dir):
        print "!! %s already exists...returning \n" % project_corpus_dir
        #return

    corpus = Corpus(project_snapshot_dir, 'java', project_corpus_dir,
                    configInfo)
    #logging.debug(corpus)
    #print corpus
    corpus.dump()
예제 #2
0
파일: run.py 프로젝트: saheel1115/szz
    if not os.path.isdir(args.proj_dir):
        print "!! Please provide a valid directory, given: %s" % (
            args.proj_dir)
        sys.exit()

    print "Going to process project %s for %s" % (args.proj_dir, args.lang)

    print "Creating output directory at %s" % (args.out_dir)

    Util.cleanup(args.log_file)

    #od = OutDir(args.out_dir)
    #od.create_out_dir(args.out_dir)

    Log.setLogger(args.verbose, args.log_file)

    cfg = Config(args.config_file)

    corpus = Corpus(args.proj_dir, args.lang, args.out_dir, cfg)
    logging.debug(corpus)
    corpus.dump()

    #print corpus.printSnapshots()
    #1. Fetch one line changes of a given project for a given language from the database
    #edits = fetchEdits(cfg, args.proj_dir, args.lang)

    #2. build the test corpus based on each edit
    #createTrainingCorpus(args.proj_dir, args.lang, args.out_dir, edits)
    #createTestCorpus(args.proj_dir, args.lang, od, edits)