def main(argv=None): args = docopt.docopt(__doc__) logging.basicConfig( level=logging.INFO if not args['--debug'] else logging.DEBUG, format='%(asctime)s %(levelname)s:%(name)s -- %(message)s') input_building_data_file = mysqltsv.Reader(open( args['<input_building_data>'], 'rt'), headers=False, types=[str, str, str, str, str]) input_i1_testing_data_file = mysqltsv.Reader( open(args['<input_i1_testing_data>'], 'rt'), headers=False, types=[str, str, str, str, str]) input_i2_testing_data_file = mysqltsv.Reader( open(args['<input_i2_testing_data>'], 'rt'), headers=False, types=[str, str, str, str, str]) output_file = mysqltsv.Writer(open(args['<output>'], "w")) verbose = args['--verbose'] run(input_building_data_file, input_i1_testing_data_file, input_i2_testing_data_file, output_file, verbose)
def main(argv=None): args = docopt.docopt(__doc__) logging.basicConfig( level=logging.INFO if not args['--debug'] else logging.DEBUG, format='%(asctime)s %(levelname)s:%(name)s -- %(message)s') input_file = mysqltsv.Reader(open(args['<input>'], 'rt'), headers=True, types=[ str, str, str, str, str, int, str, str, str, str, str, str, str ]) input_file_used_in_second_iteration = mysqltsv.Reader( open(args['<input>'], 'rt'), headers=True, types=[ str, str, str, str, str, int, str, str, str, str, str, str, str ]) output_file = mysqltsv.Writer(open(args['<output>'], "w"), headers=[ 'title', 'rev_id', 'user', 'username', 'comment', 'namespace', 'timestamp', 'prev_timestamp', 'session_start', 'session_end', 'session_index', 'session_events', 'event_index' ]) verbose = args['--verbose'] run(input_file, input_file_used_in_second_iteration, output_file, verbose)
def main(argv=None): args = docopt.docopt(__doc__) logging.basicConfig( level=logging.INFO if not args['--debug'] else logging.DEBUG, format='%(asctime)s %(levelname)s:%(name)s -- %(message)s') input_file = mysqltsv.Reader(open(args['<input>'], 'rt'), headers=True, types=[ str, str, str, str, str, int, str, str, str, str, str, str, str, str ]) output_file = mysqltsv.Writer( open(args['<output>'], "w"), headers=[ 'user', 'username', 'session_start', 'mean_in_seconds', 'std_in_seconds', 'namespace_0_edits', 'namespace_1_edits', 'namespace_2_edits', 'namespace_3_edits', 'namespace_4_edits', 'namespace_5_edits', 'namespace_120_edits', 'namespace_121_edits', 'edits', 'bot', 'human', 'session_length_in_seconds', 'inter_edits_less_than_5_seconds', 'inter_edits_between_5_and_20_seconds', 'inter_edits_greater_than_20_seconds', 'claims', 'distinct_claims', 'distinct_pages', 'disinct_edit_kinds', 'generic_bot_comment', 'bot_revision_comment', 'sitelink_changes', 'alias_changed', 'label_changed', 'description_changed', 'edit_war', 'inter_edits_less_than_2_seconds', 'things_removed', 'things_modified' ]) verbose = args['--verbose'] run(input_file, output_file, verbose)
def main(): args = docopt.docopt(__doc__) ores_url = args['<ores>'] context = args['<context>'] model = args['<model>'] revs = mysqltsv.Reader(sys.stdin) run(ores_url, context, model, revs)
def main(argv=None): args = docopt.docopt(__doc__, argv=argv) if len(args['<source>']) > 0: sources = [ mysqltsv.Reader(open(path, errors='replace'), error_handler=log_error) for path in args['<source>'] ] else: input_stream = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8', errors='replace') sources = [mysqltsv.Reader(input_stream, error_handler=log_error)] user_cols = args['--user'] timestamp_col = args['--timestamp'] cutoff = float(args['--cutoff']) if args['--sessions'] == "<stdout>": session_writer = mysqltsv.Writer(sys.stdout, headers=user_cols + SESSION_SUFFIX) else: session_writer = mysqltsv.Writer(open(args['--sessions'], 'w'), headers=user_cols + SESSION_SUFFIX) if args['--events'] is not None: event_writer = mysqltsv.Writer(open(args['--events'], 'w'), headers=sources[0].headers + EVENT_SUFFIX) else: event_writer = None verbose = args['--verbose'] debug = args['--debug'] run(sources, cutoff, session_writer, event_writer, user_cols, timestamp_col, verbose, debug)
def main(argv=None): args = docopt.docopt(__doc__) logging.basicConfig( level=logging.INFO if not args['--debug'] else logging.DEBUG, format='%(asctime)s %(levelname)s:%(name)s -- %(message)s') input_training_file = mysqltsv.Reader( open(args['<input_training>'], 'rt'), headers=True, types=[ str, str, str, float, float, int, int, int, int, int, int, int, int, int, str, str, float, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int ]) input_testing_file = mysqltsv.Reader( open(args['<input_testing>'], 'rt'), headers=True, types=[ str, str, str, float, float, int, int, int, int, int, int, int, int, int, str, str, float, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int ]) input_anonymous_data_file = mysqltsv.Reader( open(args['<input_anonymous_data>'], 'rt'), headers=True, types=[ str, str, float, float, int, int, int, int, int, int, int, int, int, float, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int ]) r_forest_predictions_output_file = mysqltsv.Writer( open(args['<r_forest_predictions_output>'], "w"), headers=[ 'username', 'session_start', 'mean_in_seconds', 'std_in_seconds', 'namespace_0_edits', 'namespace_1_edits', 'namespace_2_edits', 'namespace_3_edits', 'namespace_4_edits', 'namespace_5_edits', 'namespace_120_edits', 'namespace_121_edits', 'edits', 'session_length_in_seconds', 'inter_edits_less_than_5_seconds', 'inter_edits_between_5_and_20_seconds', 'inter_edits_greater_than_20_seconds', 'bot_prediction' ]) gradient_b_predictions_output_file = mysqltsv.Writer( open(args['<gradient_b_predictions_output>'], "w"), headers=[ 'username', 'session_start', 'mean_in_seconds', 'std_in_seconds', 'namespace_0_edits', 'namespace_1_edits', 'namespace_2_edits', 'namespace_3_edits', 'namespace_4_edits', 'namespace_5_edits', 'namespace_120_edits', 'namespace_121_edits', 'edits', 'session_length_in_seconds', 'inter_edits_less_than_5_seconds', 'inter_edits_between_5_and_20_seconds', 'inter_edits_greater_than_20_seconds', 'bot_prediction' ]) gradient_b_predictions_i2_output_file = mysqltsv.Writer( open(args['<gradient_b_predictions_i2_output>'], "w"), headers=[ 'username', 'session_start', 'mean_in_seconds', 'std_in_seconds', 'namespace_0_edits', 'namespace_1_edits', 'namespace_2_edits', 'namespace_3_edits', 'namespace_4_edits', 'namespace_5_edits', 'namespace_120_edits', 'namespace_121_edits', 'edits', 'session_length_in_seconds', 'inter_edits_less_than_5_seconds', 'inter_edits_between_5_and_20_seconds', 'inter_edits_greater_than_20_seconds', 'claims', 'distinct_claims', 'distinct_pages', 'disinct_edit_kinds', 'generic_bot_comment', 'bot_revision_comment', 'sitelink_changes', 'alias_changed', 'label_changed', 'description_changed', 'edit_war', 'inter_edits_less_than_2_seconds', 'things_removed', 'things_modified', 'bot_prediction' ]) gradient_b_threshold_scores_output_file = mysqltsv.Writer( open(args['<gradient_b_threshold_scores_output>'], "w"), headers=[ 'username', 'session_start', 'mean_in_seconds', 'std_in_seconds', 'namespace_0_edits', 'namespace_1_edits', 'namespace_2_edits', 'namespace_3_edits', 'namespace_4_edits', 'namespace_5_edits', 'namespace_120_edits', 'namespace_121_edits', 'edits', 'session_length_in_seconds', 'inter_edits_less_than_5_seconds', 'inter_edits_between_5_and_20_seconds', 'inter_edits_greater_than_20_seconds', 'threshold_score' ]) gradient_b_threshold_scores_i2_output_file = mysqltsv.Writer( open(args['<gradient_b_threshold_scores_i2_output>'], "w"), headers=[ 'username', 'session_start', 'mean_in_seconds', 'std_in_seconds', 'namespace_0_edits', 'namespace_1_edits', 'namespace_2_edits', 'namespace_3_edits', 'namespace_4_edits', 'namespace_5_edits', 'namespace_120_edits', 'namespace_121_edits', 'edits', 'session_length_in_seconds', 'inter_edits_less_than_5_seconds', 'inter_edits_between_5_and_20_seconds', 'inter_edits_greater_than_20_seconds', 'claims', 'distinct_claims', 'distinct_pages', 'disinct_edit_kinds', 'generic_bot_comment', 'bot_revision_comment', 'sitelink_changes', 'alias_changed', 'label_changed', 'description_changed', 'edit_war', 'inter_edits_less_than_2_seconds', 'things_removed', 'things_modified', 'threshold_score' ]) testing_output_file = mysqltsv.Writer( open(args['<testing_output>'], "w"), headers=[ 'user', 'username', 'session_start', 'mean_in_seconds', 'std_in_seconds', 'namespace_0_edits', 'namespace_1_edits', 'namespace_2_edits', 'namespace_3_edits', 'namespace_4_edits', 'namespace_5_edits', 'namespace_120_edits', 'namespace_121_edits', 'edits', 'session_length_in_seconds', 'inter_edits_less_than_5_seconds', 'inter_edits_between_5_and_20_seconds', 'inter_edits_greater_than_20_seconds', 'bot', 'human', 'bot_prediction' ]) pr_output_file = mysqltsv.Writer(open(args['<pr_output>'], "w"), headers=['precision', 'recall']) roc_output_file = mysqltsv.Writer( open(args['<roc_output>'], "w"), headers=['false_positives', 'true_positives']) verbose = args['--verbose'] run(input_training_file, input_testing_file, input_anonymous_data_file, r_forest_predictions_output_file, gradient_b_predictions_output_file, gradient_b_predictions_i2_output_file, gradient_b_threshold_scores_output_file, gradient_b_threshold_scores_i2_output_file, testing_output_file, pr_output_file, roc_output_file, verbose)
def main(argv=None): args = docopt.docopt(__doc__) logging.basicConfig( level=logging.INFO if not args['--debug'] else logging.DEBUG, format='%(asctime)s %(levelname)s:%(name)s -- %(message)s' ) input_testing_file = mysqltsv.Reader( open(args['<input_testing>'],'rt'), headers=True, types=[str, str, str, float, float, int, int, int, int, int, int, int, int, int, float, int, int, int, str, str, int]) input_anonymous_user_threshold_scores_file = mysqltsv.Reader( open(args['<input_anonymous_user_threshold_scores>'],'rt'), headers=True, types=[str, str, float, float, int, int, int, int, int, int, int, int, int, float, int, int, int, float]) input_anonymous_user_threshold_scores_i2_file = mysqltsv.Reader( open(args['<input_anonymous_user_threshold_scores_i2>'],'rt'), headers=True, types=[str, str, float, float, int, int, int, int, int, int, int, int, int, float, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, float]) anonymous_user_samples_output_file = mysqltsv.Writer( open(args['<anonymous_user_samples_output>'], "w"), headers=['session start timestamp', 'session completed timestamp', 'url', 'Consistent revision frequency', 'Comment is "Updated item"', 'Similar operations occur to different pages', 'More than one claim edited per revision', 'At least one rev. comment is prefixed by "bot" or "robot"', 'Short session with rapid revisions', 'Not-obviously a bot']) anonymous_user_samples_i2_output_file = mysqltsv.Writer( open(args['<anonymous_user_samples_i2_output>'], "w"), headers=['session start timestamp', 'session completed timestamp', 'url', 'Consistent revision frequency', 'Comment is "Updated item"', 'Similar operations occur to different pages', 'More than one claim edited per revision', 'At least one rev. comment is prefixed by "bot" or "robot"', 'Short session with rapid revisions', 'Not-obviously a bot']) testing_samples_output_file = mysqltsv.Writer( open(args['<testing_samples_output>'], "w"), headers=['session start timestamp', 'session completed timestamp', 'url', 'Consistent revision frequency', 'Comment is "Updated item"', 'Similar operations occur to different pages', 'More than one claim edited per revision', 'At least one rev. comment is prefixed by "bot" or "robot"', 'Short session with rapid revisions', 'Not-obviously a bot']) verbose = args['--verbose'] run(input_testing_file, input_anonymous_user_threshold_scores_file, input_anonymous_user_threshold_scores_i2_file, anonymous_user_samples_output_file, anonymous_user_samples_i2_output_file, testing_samples_output_file, verbose)
import io import sys import mysqltsv writer = mysqltsv.Writer(sys.stdout, headers=['user_id', 'user_text', 'edits']) writer.write([10, 'Foobar_Barman', 2344]) writer.write({'user_text': 'Barfoo_Fooman', 'user_id': 11, 'edits': 20}) writer.write([None, "127.0.0.1", 42]) my_file = io.StringIO("user_id\tuser_text\tedits\n" + "10\tFoobar_Barman\t2344\n" + "11\tBarfoo_Fooman\t20\n" + "NULL\t127.0.0.1\t42\n") reader = mysqltsv.Reader(my_file, types=[int, str, int]) for row in reader: print(repr(row.user_id), repr(row['user_text']), repr(row[2]))
import sys import mysqltsv revs = mysqltsv.Reader(sys.stdin) for rev in revs: print(rev.values())