def main(argv=None): args = docopt.docopt(__doc__, argv=argv) logging.basicConfig( level=logging.DEBUG if args['--debug'] else logging.INFO, format='%(asctime)s %(levelname)s:%(name)s -- %(message)s' ) if args['--class-weight'] is not None: class_weights = dict( map(_parse_class_weight_option, args['--class-weight']) ) global CLASS_WEIGHTS CLASS_WEIGHTS.update(class_weights) paths = args['<dump-file>'] with open(args['--model']) as f: model = Model.load(f) sunset = mwtypes.Timestamp(args['--sunset']) if args['--score-at'] not in SCORE_ATS: raise ValueError("--score-at value {0} not available in {1}" .format(args['--score-at'], SCORE_ATS)) else: score_at = args['--score-at'] if args['--rev-scores'] == "<stdout>": rev_scores = mysqltsv.Writer(sys.stdout, headers=HEADERS) else: rev_scores = mysqltsv.Writer( open(args['--rev-scores'], "w"), headers=HEADERS) if args['--extend'] is None: skip_scores_before = {} else: logger.info("Reading in past scores from {0}".format(args['--extend'])) skip_scores_before = {} rows = mysqltsv.read( open(args['--extend']), types=[int, str, int, mwtypes.Timestamp, str, float]) for row in rows: skip_scores_before[row.page_id] = row.timestamp logger.info("Completed reading scores from old output.") if args['--processes'] == "<cpu count>": processes = cpu_count() else: processes = int(args['--processes']) verbose = args['--verbose'] run(paths, model, sunset, score_at, rev_scores, skip_scores_before, processes, verbose=verbose)
def main(argv=None): args = docopt.docopt(__doc__) logging.basicConfig( level=logging.INFO if not args['--debug'] else logging.DEBUG, format='%(asctime)s %(levelname)s:%(name)s -- %(message)s') input_building_data_file = mysqltsv.Reader(open( args['<input_building_data>'], 'rt'), headers=False, types=[str, str, str, str, str]) input_i1_testing_data_file = mysqltsv.Reader( open(args['<input_i1_testing_data>'], 'rt'), headers=False, types=[str, str, str, str, str]) input_i2_testing_data_file = mysqltsv.Reader( open(args['<input_i2_testing_data>'], 'rt'), headers=False, types=[str, str, str, str, str]) output_file = mysqltsv.Writer(open(args['<output>'], "w")) verbose = args['--verbose'] run(input_building_data_file, input_i1_testing_data_file, input_i2_testing_data_file, output_file, verbose)
def main(argv=None): args = docopt.docopt(__doc__) logging.basicConfig( level=logging.INFO if not args['--debug'] else logging.DEBUG, format='%(asctime)s %(levelname)s:%(name)s -- %(message)s') input_file = mysqltsv.Reader(open(args['<input>'], 'rt'), headers=True, types=[ str, str, str, str, str, int, str, str, str, str, str, str, str ]) input_file_used_in_second_iteration = mysqltsv.Reader( open(args['<input>'], 'rt'), headers=True, types=[ str, str, str, str, str, int, str, str, str, str, str, str, str ]) output_file = mysqltsv.Writer(open(args['<output>'], "w"), headers=[ 'title', 'rev_id', 'user', 'username', 'comment', 'namespace', 'timestamp', 'prev_timestamp', 'session_start', 'session_end', 'session_index', 'session_events', 'event_index' ]) verbose = args['--verbose'] run(input_file, input_file_used_in_second_iteration, output_file, verbose)
def main(argv=None): args = docopt.docopt(__doc__) logging.basicConfig( level=logging.INFO if not args['--debug'] else logging.DEBUG, format='%(asctime)s %(levelname)s:%(name)s -- %(message)s') input_file = mysqltsv.Reader(open(args['<input>'], 'rt'), headers=True, types=[ str, str, str, str, str, int, str, str, str, str, str, str, str, str ]) output_file = mysqltsv.Writer( open(args['<output>'], "w"), headers=[ 'user', 'username', 'session_start', 'mean_in_seconds', 'std_in_seconds', 'namespace_0_edits', 'namespace_1_edits', 'namespace_2_edits', 'namespace_3_edits', 'namespace_4_edits', 'namespace_5_edits', 'namespace_120_edits', 'namespace_121_edits', 'edits', 'bot', 'human', 'session_length_in_seconds', 'inter_edits_less_than_5_seconds', 'inter_edits_between_5_and_20_seconds', 'inter_edits_greater_than_20_seconds', 'claims', 'distinct_claims', 'distinct_pages', 'disinct_edit_kinds', 'generic_bot_comment', 'bot_revision_comment', 'sitelink_changes', 'alias_changed', 'label_changed', 'description_changed', 'edit_war', 'inter_edits_less_than_2_seconds', 'things_removed', 'things_modified' ]) verbose = args['--verbose'] run(input_file, output_file, verbose)
def run(observations, output, features, label_name, model, additional_fields, verbose): headers = [str(f) for f in features] + [label_name] if model is not None: headers.append("score_doc") headers.extend(additional_fields) writer = mysqltsv.Writer(output, headers=headers) for ob in observations: try: feature_values = list(solve(features, cache=ob['cache'])) row = feature_values + [ob[label_name]] if model is not None: score_doc = model.score(feature_values) row += [json.dumps(score_doc)] for field_name in additional_fields: row += [ob[field_name]] writer.write(row) if verbose: sys.stderr.write(".") sys.stderr.flush() except: # noqa: E722 # Naughty indiscriminate exception consumption. sys.stderr.write(traceback.format_exc()) if verbose: sys.stderr.write("\n")
def run(dump_files, extractors): writer = mysqltsv.Writer(sys.stdout, headers=HEADERS) cites = extract(dump_files, extractors=extractors) for page_id, title, rev_id, timestamp, type, id in cites: writer.write( [page_id, title, rev_id, timestamp.long_format(), type, id])
def main(argv=None): args = docopt.docopt(__doc__, argv=argv) logging.basicConfig( level=logging.INFO, format='%(asctime)s %(levelname)s:%(name)s -- %(message)s' ) revert_radius = int(args['--revert-radius']) revert_window = int(args['--revert-window']) * (60 * 60) # secs --> hrs if args['--host']: session = mwapi.Session(args['--host'], user_agent="ORES revert labeling utility") else: session = None dumps = args['<dump-file>'] verbose = args['--verbose'] start = args['--start'] if start: start = Timestamp(start) end = args['--end'] if end: end = Timestamp(end) reverted_only = args['--reverted-only'] trusted_groups = args['--trusted-groups'] if trusted_groups: trusted_groups = trusted_groups.split(',') trusted_users = load_user_group_members(trusted_groups, session) else: trusted_users = None trusted_edits = args['--trusted-edits'] if trusted_edits: trusted_edits = int(trusted_edits) if args['--rev-reverteds'] == "<stdout>": rev_reverteds = mysqltsv.Writer(sys.stdout) else: rev_reverteds = mysqltsv.Writer(open(args['--rev-reverteds'], "w")) check_blocked = args['--check-blocked'] run(dumps, session, start, end, revert_radius, revert_window, reverted_only, trusted_users, trusted_edits, rev_reverteds, check_blocked, verbose=verbose)
def run(ores_url, context, model, revs): writer = mysqltsv.Writer(sys.stdout, headers=revs.headers + ['proba']) for batch in batches(revs): rev_ids = [r.rev_id for r in batch] probas = get_probas(ores_url, context, model, rev_ids) for rev, proba in zip(batch, probas): writer.write(list(rev) + [proba])
def run(paths, rate): writer = mysqltsv.Writer(sys.stdout, headers=HEADERS) def process_path(path): f = mwcli.files.reader(path) return sample_tokens((json.loads(line) for line in f), rate) for values in para.map(process_path, paths): writer.write(values)
def main(argv=None): args = docopt.docopt(__doc__, argv=argv) if len(args['<source>']) > 0: sources = [ mysqltsv.Reader(open(path, errors='replace'), error_handler=log_error) for path in args['<source>'] ] else: input_stream = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8', errors='replace') sources = [mysqltsv.Reader(input_stream, error_handler=log_error)] user_cols = args['--user'] timestamp_col = args['--timestamp'] cutoff = float(args['--cutoff']) if args['--sessions'] == "<stdout>": session_writer = mysqltsv.Writer(sys.stdout, headers=user_cols + SESSION_SUFFIX) else: session_writer = mysqltsv.Writer(open(args['--sessions'], 'w'), headers=user_cols + SESSION_SUFFIX) if args['--events'] is not None: event_writer = mysqltsv.Writer(open(args['--events'], 'w'), headers=sources[0].headers + EVENT_SUFFIX) else: event_writer = None verbose = args['--verbose'] debug = args['--debug'] run(sources, cutoff, session_writer, event_writer, user_cols, timestamp_col, verbose, debug)
def main(argv=None): args = docopt.docopt(__doc__) logging.basicConfig( level=logging.INFO if not args['--debug'] else logging.DEBUG, format='%(asctime)s %(levelname)s:%(name)s -- %(message)s' ) input_files = args['<input>'] revisions_output_file =\ mysqltsv.Writer(open(args['--revisions-output'], "w")) verbose = args['--verbose'] run(input_files, revisions_output_file, verbose)
def run(rev_ids, score_processor, cache, verbose, debug): writer = mysqltsv.Writer(sys.stdout, headers=['rev_id', 'true_proba']) for rev_id, score in score_processor.score(rev_ids, cache=cache): if 'type' in score: sys.stderr.write("e") elif 'probability' in score: writer.write([rev_id, score['probability'][True]]) sys.stderr.write(".") else: sys.stderr.write(json.dumps(score)) sys.stderr.flush() sys.stderr.write("\n")
def run(observations, features, label_name, verbose): writer = mysqltsv.Writer( sys.stdout, headers=[str(f) for f in features] + [label_name]) for ob in observations: try: row = list(solve(features, cache=ob['cache'])) + [ob[label_name]] writer.write(row) if verbose: sys.stderr.write(".") sys.stderr.flush() except: # noqa: E722 # Naughty indiscriminate exception consumption. sys.stderr.write(traceback.format_exc()) if verbose: sys.stderr.write("\n")
def main(): args = docopt.docopt(__doc__) HEADINGS = [ "month", "page_namespace", "reverts", "bot_reverts", "bot_reverteds", "bot2bot_reverts" ] if args['--bots']: bots = {u.strip() for u in open(args['--bots'])} else: bots = None logging.basicConfig( level=logging.WARNING, format='%(asctime)s %(levelname)s:%(name)s -- %(message)s') writer = mysqltsv.Writer(sys.stdout, headers=HEADINGS) nmc = defaultdict(lambda: defaultdict(lambda: defaultdict(int))) for doc in read_json_lines(sys.stdin): reverted_username = doc['reverteds'][-1].get('user', {}).get('text') reverting_username = doc['reverting'].get('user', {}).get('text') if reverted_username == reverting_username: continue dbts = Timestamp(doc['reverting']['timestamp']).short_format() month = dbts[:6] + "01" namespace = doc['reverting']['page']['namespace'] nmc[month][namespace]['reverts'] += 1 nmc[month][namespace]['bot_reverts'] += reverting_username in bots nmc[month][namespace]['bot_reverteds'] += reverted_username in bots nmc[month][namespace]['bot2bot_reverts'] += (reverting_username in bots and reverted_username in bots) for month in sorted(nmc.keys()): for page_namespace in sorted(nmc[month].keys()): counts = nmc[month][page_namespace] writer.write([ month, page_namespace, counts['reverts'], counts['bot_reverts'], counts['bot_reverteds'], counts['bot2bot_reverts'] ])
def run(models, obs, output, workers, verbose): writer = mysqltsv.Writer( output, headers=['rev_id', 'i', 'sentence', 'length', 'productions'] + [name for name, model in models]) se_scorer = SentenceExtractorScorer(models) with ProcessPoolExecutor(max_workers=workers) as executor: logger.info("Processing sentences...") for error, r_i_s_l_scores in executor.map(se_scorer.score, obs): if error is None: for rev_id, i, sent, slen, scores in r_i_s_l_scores: writer.write([rev_id, i, sent, slen, scores[0]['productions']] + [s['log_proba'] for s in scores]) if verbose: sys.stderr.write(".") sys.stderr.flush() else: sys.stderr.write(str(error)) sys.stderr.write('\n') if verbose: sys.stderr.flush()
process_language(detect( wikicode.strip_code().strip()))) except: pass languages = list(set(languages)) detected_languages = list(set(detected_languages)) yield page.id, wikitext_length, has_infobox, has_description_field, str( languages), str(detected_languages) print("total files: " + str(number_of_files)) output = mysqltsv.Writer( open("data/sdoc/commonswiki_20171120_files_description.tsv", "w"), headers=[ "page_id", "wikitext_length", "has_infobox", "has_description_field", "languages", "detected_languages" ]) for page_id, wikitext_length, has_infobox, has_description_field, languages, detected_languages in mwxml.map( process_dump, dump_files): output.write([ page_id, wikitext_length, has_infobox, has_description_field, languages, detected_languages ]) # compress the tsv file # tar -czvf commonswiki_20171120_files_description.tar.gz commonswiki_20171120_files_description.tsv
import io import sys import mysqltsv writer = mysqltsv.Writer(sys.stdout, headers=['user_id', 'user_text', 'edits']) writer.write([10, 'Foobar_Barman', 2344]) writer.write({'user_text': 'Barfoo_Fooman', 'user_id': 11, 'edits': 20}) writer.write([None, "127.0.0.1", 42]) my_file = io.StringIO("user_id\tuser_text\tedits\n" + "10\tFoobar_Barman\t2344\n" + "11\tBarfoo_Fooman\t20\n" + "NULL\t127.0.0.1\t42\n") reader = mysqltsv.Reader(my_file, types=[int, str, int]) for row in reader: print(repr(row.user_id), repr(row['user_text']), repr(row[2]))
def main(): args = docopt.docopt(__doc__) HEADINGS = [ "rev_id", "rev_timestamp", "rev_user", "rev_user_text", "rev_page", "rev_sha1", "rev_minor_edit", "rev_deleted", "rev_parent_id", "archived", "reverting_id", "reverting_timestamp", "reverting_user", "reverting_user_text", "reverting_page", "reverting_sha1", "reverting_minor_edit", "reverting_deleted", "reverting_parent_id", "reverting_archived", "reverting_comment", "rev_revert_offset", "revisions_reverted", "reverted_to_rev_id", "page_namespace" ] if args['--users']: users = {u.strip() for u in open(args['--users'])} else: users = None writer = mysqltsv.Writer(sys.stdout, headers=HEADINGS) for doc in (json.loads(l) for l in sys.stdin): reverted_username = doc['reverteds'][-1].get('user', {}).get('text') reverting_username = doc['reverting'].get('user', {}).get('text') if reverted_username == reverting_username: continue if users is not None and \ not (reverted_username in users and reverting_username in users): continue writer.write([ doc['reverteds'][-1]['id'], # rev_id Timestamp(doc['reverteds'][-1] ['timestamp']).short_format(), # rev_timestamp doc['reverteds'][-1].get('user', {}).get('id'), # rev_user doc['reverteds'][-1].get('user', {}).get('text'), # rev_user_text doc['reverteds'][-1]['page']['id'], # rev_page doc['reverteds'][-1].get('sha1'), # rev_sha1 doc['reverteds'][-1]['minor'], # rev_minor_edit doc['reverteds'][-1]['deleted']['text'], # rev_deleted doc['reverteds'][-1].get('parent_id'), # rev_parent_id False, # archived doc['reverting']['id'], # reverting_id Timestamp(doc['reverting'] ['timestamp']).short_format(), # reverting_timestamp doc['reverting'].get('user', {}).get('id'), # reverting_user doc['reverting'].get('user', {}).get('text'), # reverting_user_text doc['reverting']['page']['id'], # reverting_page doc['reverting'].get('sha1'), # reverting_sha1 doc['reverting']['minor'], # reverting_minor_edit doc['reverting']['deleted']['text'], # reverting_deleted doc['reverting'].get('parent_id'), # reverting_parent_id False, # reverting_archived doc['reverting'].get('comment', '-'), # reverting_comment len(doc['reverteds']), # rev_revert_offset len(doc['reverteds']), # revisions_reverted doc['reverted_to']['id'], # reverted_to_rev_id doc['reverting']['page']['namespace'] # page_namespace ]) sys.stderr.write(".") sys.stderr.flush() sys.stderr.write("\n")
def main(argv=None): args = docopt.docopt(__doc__) logging.basicConfig( level=logging.INFO if not args['--debug'] else logging.DEBUG, format='%(asctime)s %(levelname)s:%(name)s -- %(message)s' ) input_testing_file = mysqltsv.Reader( open(args['<input_testing>'],'rt'), headers=True, types=[str, str, str, float, float, int, int, int, int, int, int, int, int, int, float, int, int, int, str, str, int]) input_anonymous_user_threshold_scores_file = mysqltsv.Reader( open(args['<input_anonymous_user_threshold_scores>'],'rt'), headers=True, types=[str, str, float, float, int, int, int, int, int, int, int, int, int, float, int, int, int, float]) input_anonymous_user_threshold_scores_i2_file = mysqltsv.Reader( open(args['<input_anonymous_user_threshold_scores_i2>'],'rt'), headers=True, types=[str, str, float, float, int, int, int, int, int, int, int, int, int, float, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, float]) anonymous_user_samples_output_file = mysqltsv.Writer( open(args['<anonymous_user_samples_output>'], "w"), headers=['session start timestamp', 'session completed timestamp', 'url', 'Consistent revision frequency', 'Comment is "Updated item"', 'Similar operations occur to different pages', 'More than one claim edited per revision', 'At least one rev. comment is prefixed by "bot" or "robot"', 'Short session with rapid revisions', 'Not-obviously a bot']) anonymous_user_samples_i2_output_file = mysqltsv.Writer( open(args['<anonymous_user_samples_i2_output>'], "w"), headers=['session start timestamp', 'session completed timestamp', 'url', 'Consistent revision frequency', 'Comment is "Updated item"', 'Similar operations occur to different pages', 'More than one claim edited per revision', 'At least one rev. comment is prefixed by "bot" or "robot"', 'Short session with rapid revisions', 'Not-obviously a bot']) testing_samples_output_file = mysqltsv.Writer( open(args['<testing_samples_output>'], "w"), headers=['session start timestamp', 'session completed timestamp', 'url', 'Consistent revision frequency', 'Comment is "Updated item"', 'Similar operations occur to different pages', 'More than one claim edited per revision', 'At least one rev. comment is prefixed by "bot" or "robot"', 'Short session with rapid revisions', 'Not-obviously a bot']) verbose = args['--verbose'] run(input_testing_file, input_anonymous_user_threshold_scores_file, input_anonymous_user_threshold_scores_i2_file, anonymous_user_samples_output_file, anonymous_user_samples_i2_output_file, testing_samples_output_file, verbose)
def main(argv=None): args = docopt.docopt(__doc__) logging.basicConfig( level=logging.INFO if not args['--debug'] else logging.DEBUG, format='%(asctime)s %(levelname)s:%(name)s -- %(message)s') input_training_file = mysqltsv.Reader( open(args['<input_training>'], 'rt'), headers=True, types=[ str, str, str, float, float, int, int, int, int, int, int, int, int, int, str, str, float, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int ]) input_testing_file = mysqltsv.Reader( open(args['<input_testing>'], 'rt'), headers=True, types=[ str, str, str, float, float, int, int, int, int, int, int, int, int, int, str, str, float, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int ]) input_anonymous_data_file = mysqltsv.Reader( open(args['<input_anonymous_data>'], 'rt'), headers=True, types=[ str, str, float, float, int, int, int, int, int, int, int, int, int, float, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int ]) r_forest_predictions_output_file = mysqltsv.Writer( open(args['<r_forest_predictions_output>'], "w"), headers=[ 'username', 'session_start', 'mean_in_seconds', 'std_in_seconds', 'namespace_0_edits', 'namespace_1_edits', 'namespace_2_edits', 'namespace_3_edits', 'namespace_4_edits', 'namespace_5_edits', 'namespace_120_edits', 'namespace_121_edits', 'edits', 'session_length_in_seconds', 'inter_edits_less_than_5_seconds', 'inter_edits_between_5_and_20_seconds', 'inter_edits_greater_than_20_seconds', 'bot_prediction' ]) gradient_b_predictions_output_file = mysqltsv.Writer( open(args['<gradient_b_predictions_output>'], "w"), headers=[ 'username', 'session_start', 'mean_in_seconds', 'std_in_seconds', 'namespace_0_edits', 'namespace_1_edits', 'namespace_2_edits', 'namespace_3_edits', 'namespace_4_edits', 'namespace_5_edits', 'namespace_120_edits', 'namespace_121_edits', 'edits', 'session_length_in_seconds', 'inter_edits_less_than_5_seconds', 'inter_edits_between_5_and_20_seconds', 'inter_edits_greater_than_20_seconds', 'bot_prediction' ]) gradient_b_predictions_i2_output_file = mysqltsv.Writer( open(args['<gradient_b_predictions_i2_output>'], "w"), headers=[ 'username', 'session_start', 'mean_in_seconds', 'std_in_seconds', 'namespace_0_edits', 'namespace_1_edits', 'namespace_2_edits', 'namespace_3_edits', 'namespace_4_edits', 'namespace_5_edits', 'namespace_120_edits', 'namespace_121_edits', 'edits', 'session_length_in_seconds', 'inter_edits_less_than_5_seconds', 'inter_edits_between_5_and_20_seconds', 'inter_edits_greater_than_20_seconds', 'claims', 'distinct_claims', 'distinct_pages', 'disinct_edit_kinds', 'generic_bot_comment', 'bot_revision_comment', 'sitelink_changes', 'alias_changed', 'label_changed', 'description_changed', 'edit_war', 'inter_edits_less_than_2_seconds', 'things_removed', 'things_modified', 'bot_prediction' ]) gradient_b_threshold_scores_output_file = mysqltsv.Writer( open(args['<gradient_b_threshold_scores_output>'], "w"), headers=[ 'username', 'session_start', 'mean_in_seconds', 'std_in_seconds', 'namespace_0_edits', 'namespace_1_edits', 'namespace_2_edits', 'namespace_3_edits', 'namespace_4_edits', 'namespace_5_edits', 'namespace_120_edits', 'namespace_121_edits', 'edits', 'session_length_in_seconds', 'inter_edits_less_than_5_seconds', 'inter_edits_between_5_and_20_seconds', 'inter_edits_greater_than_20_seconds', 'threshold_score' ]) gradient_b_threshold_scores_i2_output_file = mysqltsv.Writer( open(args['<gradient_b_threshold_scores_i2_output>'], "w"), headers=[ 'username', 'session_start', 'mean_in_seconds', 'std_in_seconds', 'namespace_0_edits', 'namespace_1_edits', 'namespace_2_edits', 'namespace_3_edits', 'namespace_4_edits', 'namespace_5_edits', 'namespace_120_edits', 'namespace_121_edits', 'edits', 'session_length_in_seconds', 'inter_edits_less_than_5_seconds', 'inter_edits_between_5_and_20_seconds', 'inter_edits_greater_than_20_seconds', 'claims', 'distinct_claims', 'distinct_pages', 'disinct_edit_kinds', 'generic_bot_comment', 'bot_revision_comment', 'sitelink_changes', 'alias_changed', 'label_changed', 'description_changed', 'edit_war', 'inter_edits_less_than_2_seconds', 'things_removed', 'things_modified', 'threshold_score' ]) testing_output_file = mysqltsv.Writer( open(args['<testing_output>'], "w"), headers=[ 'user', 'username', 'session_start', 'mean_in_seconds', 'std_in_seconds', 'namespace_0_edits', 'namespace_1_edits', 'namespace_2_edits', 'namespace_3_edits', 'namespace_4_edits', 'namespace_5_edits', 'namespace_120_edits', 'namespace_121_edits', 'edits', 'session_length_in_seconds', 'inter_edits_less_than_5_seconds', 'inter_edits_between_5_and_20_seconds', 'inter_edits_greater_than_20_seconds', 'bot', 'human', 'bot_prediction' ]) pr_output_file = mysqltsv.Writer(open(args['<pr_output>'], "w"), headers=['precision', 'recall']) roc_output_file = mysqltsv.Writer( open(args['<roc_output>'], "w"), headers=['false_positives', 'true_positives']) verbose = args['--verbose'] run(input_training_file, input_testing_file, input_anonymous_data_file, r_forest_predictions_output_file, gradient_b_predictions_output_file, gradient_b_predictions_i2_output_file, gradient_b_threshold_scores_output_file, gradient_b_threshold_scores_i2_output_file, testing_output_file, pr_output_file, roc_output_file, verbose)