default='data/joblib.dump') args, unknown = parser.parse_known_args() temp_csv = 'temp.csv' temp2_csv = 'temp2.csv' if args.start is None or args.stop is None: print('A pair of commit HASHs must be passed as input!') exit(-1) if not os.path.isfile(args.model): print('A valid trained model must be passed ad input argument!') exit(-1) # Get a list of touched methods in the last commit miner = Miner(args.repo, args.ext, temp_csv) miner.mine_methods(args.start, args.start) # Count the number of columns into which split dataset fin = open(temp_csv, mode='r') header = fin.readline() features = header.split(',') column_count = len(features) # Read remaining rows to extract files and methods allowed_methods = set() allowed_files = set() for line in fin.readlines(): cols = line.split(',') touched = int( cols[78] ) # Touched sum (this is a binary value since at this point only one commit is inspected)
'-bn', '--bic_name', type=str, help= 'The name of the column in the input CSV file used to identify the GIT HASH of a bic commit.', default='bic_commit') parser.add_argument( '-fn', '--fix_name', type=str, help= 'The name of the column in the input CSV file used to identify the GIT HASH of a bic commit.', default='git_hash') parser.add_argument('-o', '--output', type=str, help='Path of the CSV file where to save results.', default='data/method_metrics_geko-dev-2.csv') args, unknown = parser.parse_known_args() # Check that a valid repos is specified if args.repo is None or not os.path.isdir(args.repo): print('A valid path to a GIT repository must be specified!') exit(-1) bic_commits = get_bic_commits(args.bic, args.bic_name) fix_commits = get_fix_commits(args.fix, args.fix_name) miner = Miner(args.repo, args.ext, args.output, bic_commits, fix_commits) miner.mine_methods(args.start, args.stop) print("\n*** Extractor ended ***")