def breakdown(self): from visualizers.breakdown import main parser = ArgParseDefault(description='Visualize breakdown of messages') parser = add_load_data_args(parser) parser.add_argument( '--as-density', dest='as_density', action='store_true', help='Plots individual densities instead of stacked histograms') parser.add_argument( '-n', '--top-n', dest='top_n', type=int, default=10, help= 'Only consider the top n conversation partners (by number of messages)' ) parser.add_argument( '-b', '--bin-size', dest='bin_size', type=str, default='1M', help= 'Bin sizes (use the pandas Timedelta abbreviations of the form <number><type> where type can be ‘Y’, ‘M’, ‘W’, ‘D’, ‘days’, ‘day’, ‘hours’, hour’, ‘hr’, ‘h’, ‘m’, ‘minute’, ‘min’, ‘minutes’, ‘T’, ‘S’, ‘seconds’, ‘sec’, ‘second’, ‘ms’, ‘milliseconds’, ‘millisecond’, ‘milli’, ‘millis’, ‘L’, ‘us’, ‘microseconds’, ‘microsecond’, ‘micro’, ‘micros’, ‘U’, ‘ns’, ‘nanoseconds’, ‘nano’, ‘nanos’, ‘nanosecond’, ‘N’).' ) args = parser.parse_args(sys.argv[2:]) main(args)
def main(): """Simple method to export message logs to either stdout or to a file""" def get_f_name(): ts = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') f_path = os.path.join('exports', f'chatistics_export_{ts}.{args.format}') return f_path parser = ArgParseDefault(description='Export parsed chatlog data') parser = add_load_data_args(parser) parser.add_argument( '-n', '--num-rows', dest='num_rows', type=int, default=50, help= 'Print first n rows (use negative negative for last rows) (only used if output format is stdout)' ) parser.add_argument( '-c', '--cols', dest='cols', nargs='+', default=[ 'timestamp', 'conversationWithName', 'senderName', 'outgoing', 'text', 'language', 'platform' ], help='Only show specific columns (only used if output format is stdout)' ) parser.add_argument('-f', '--format', dest='format', default='stdout', choices=['stdout', 'json', 'csv', 'pkl'], help='Output format') args = parser.parse_args() df = load_data(args) if args.format == 'stdout': # Print data to stdout df = df.iloc[:args.num_rows] df.loc[:, 'timestamp'] = pd.to_datetime(df.timestamp, unit='s') pd.set_option('display.max_colwidth', 100) with pd.option_context('display.max_rows', 1000, 'display.width', -1): print(df[args.cols].to_string(index=False)) else: # Exporting data to a file f_name = get_f_name() log.info(f'Exporting data to file {f_name}') if args.format == 'json': df.to_json(f_name, orient='records') elif args.format == 'csv': df.to_csv(f_name, index=False) elif args.format == 'pkl': with open(f_name, 'wb') as f: pickle.dump(df, f) else: raise Exception(f'Format {args.format} is not supported.')
def hangouts(self): from parsers.hangouts import main parser = ArgParseDefault( description='Parse message logs from Google Hangouts') parser = add_common_parse_arguments(parser) parser.add_argument('-f', '--file-path', dest='file_path', default=config['hangouts']['DEFAULT_RAW_LOCATION'], help='Path to Hangouts chat log file (json file)') args = parser.parse_args(sys.argv[2:]) main(args.own_name, args.file_path, args.max)
def messenger(self): from parsers.messenger import main parser = ArgParseDefault( description='Parse message logs from Facebook Messenger') parser = add_common_parse_arguments(parser) parser.add_argument( '-f', '--file-path', dest='file_path', default=config['messenger']['DEFAULT_RAW_LOCATION'], help='Path to Facebook messenger chat log folder') args = parser.parse_args(sys.argv[2:]) main(args.own_name, args.file_path, args.max)
def telegram(self): from parsers.telegram import main parser = ArgParseDefault( description='Parse message logs from Telegram') parser = add_common_parse_arguments(parser) parser.add_argument( '--max-dialog', dest='max_dialog', type=int, default=config['telegram']['USER_DIALOG_MESSAGES_LIMIT'], help='Maximum number of messages to export per dialog') args = parser.parse_args(sys.argv[2:]) main(args.own_name, max_exported_messages=args.max, user_dialog_messages_limit=args.max_dialog)
def __init__(self): logging.config.fileConfig('logging.conf') parser = ArgParseDefault(description='', usage=USAGE_DESC) parser.add_argument('command', help='Subcommand to run') args = parser.parse_args(sys.argv[1:2]) if not hasattr(self, args.command): print('Unrecognized command') parser.print_help() sys.exit(1) getattr(self, args.command)()
def whatsapp(self): from parsers.whatsapp import main parser = ArgParseDefault( description='Parse message logs from Whatsapp') parser = add_common_parse_arguments(parser) parser.add_argument('-f', '--file-path', dest='file_path', default=config['whatsapp']['DEFAULT_RAW_LOCATION'], help='Path to Facebook messenger chat log folder') parser.add_argument('-i', '--infer-datetime', dest='infer_datetime', type=str2bool, nargs='?', default=True, help='Infer datetime regex for each chat if true') args = parser.parse_args(sys.argv[2:]) main(args.own_name, args.file_path, args.max, args.infer_datetime)
def cloud(self): from visualizers.cloud import main parser = ArgParseDefault(description='Visualize word clouds') parser = add_load_data_args(parser) parser.add_argument('-m', '--mask-image', dest='mask_image', type=str, default=None, help='Image to use as mask', required=True) parser.add_argument('--sw', '--stopword-paths', dest='stopword_paths', nargs='+', default=['stopwords/en.json'], help='Path to stopword files (JSON format)') parser.add_argument('-n', '--num-words', dest='num_words', type=int, default=10000, help='Print up to n words into the cloud') parser.add_argument('--density', '--dpi', dest='dpi', type=int, default=300, help='Rendered image DPI') args = parser.parse_args(sys.argv[2:]) main(args)
def parse_args(): parser = ArgParseDefault() parser.add_argument('-i', '--index', required=True, type=str, help='Name of index') parser.add_argument('-d', '--doc-type', dest='doc_type', default='tweet', required=False, type=str, help='Doc type') parser.add_argument('-o', '--out', choices=['csv'], required=False, default='csv', type=str, help='Output format') parser.add_argument('--output-file', dest='output_file', required=False, default=None, type=str, help='Output filename') parser.add_argument('--es', choices=['dev', 'stg', 'prd'], required=False, default='prd', type=str, help='Elasticsearch cluster') parser = add_agg_query_args(parser) args = parser.parse_args() return args