Exemple #1
0
 def breakdown(self):
     from visualizers.breakdown import main
     parser = ArgParseDefault(description='Visualize breakdown of messages')
     parser = add_load_data_args(parser)
     parser.add_argument(
         '--as-density',
         dest='as_density',
         action='store_true',
         help='Plots individual densities instead of stacked histograms')
     parser.add_argument(
         '-n',
         '--top-n',
         dest='top_n',
         type=int,
         default=10,
         help=
         'Only consider the top n conversation partners (by number of messages)'
     )
     parser.add_argument(
         '-b',
         '--bin-size',
         dest='bin_size',
         type=str,
         default='1M',
         help=
         'Bin sizes (use the pandas Timedelta abbreviations of the form <number><type> where type can be ‘Y’, ‘M’, ‘W’, ‘D’, ‘days’, ‘day’, ‘hours’, hour’, ‘hr’, ‘h’, ‘m’, ‘minute’, ‘min’, ‘minutes’, ‘T’, ‘S’, ‘seconds’, ‘sec’, ‘second’, ‘ms’, ‘milliseconds’, ‘millisecond’, ‘milli’, ‘millis’, ‘L’, ‘us’, ‘microseconds’, ‘microsecond’, ‘micro’, ‘micros’, ‘U’, ‘ns’, ‘nanoseconds’, ‘nano’, ‘nanos’, ‘nanosecond’, ‘N’).'
     )
     args = parser.parse_args(sys.argv[2:])
     main(args)
Exemple #2
0
def main():
    """Simple method to export message logs to either stdout or to a file"""
    def get_f_name():
        ts = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
        f_path = os.path.join('exports',
                              f'chatistics_export_{ts}.{args.format}')
        return f_path

    parser = ArgParseDefault(description='Export parsed chatlog data')
    parser = add_load_data_args(parser)
    parser.add_argument(
        '-n',
        '--num-rows',
        dest='num_rows',
        type=int,
        default=50,
        help=
        'Print first n rows (use negative negative for last rows) (only used if output format is stdout)'
    )
    parser.add_argument(
        '-c',
        '--cols',
        dest='cols',
        nargs='+',
        default=[
            'timestamp', 'conversationWithName', 'senderName', 'outgoing',
            'text', 'language', 'platform'
        ],
        help='Only show specific columns (only used if output format is stdout)'
    )
    parser.add_argument('-f',
                        '--format',
                        dest='format',
                        default='stdout',
                        choices=['stdout', 'json', 'csv', 'pkl'],
                        help='Output format')
    args = parser.parse_args()
    df = load_data(args)
    if args.format == 'stdout':
        # Print data to stdout
        df = df.iloc[:args.num_rows]
        df.loc[:, 'timestamp'] = pd.to_datetime(df.timestamp, unit='s')
        pd.set_option('display.max_colwidth', 100)
        with pd.option_context('display.max_rows', 1000, 'display.width', -1):
            print(df[args.cols].to_string(index=False))
    else:
        # Exporting data to a file
        f_name = get_f_name()
        log.info(f'Exporting data to file {f_name}')
        if args.format == 'json':
            df.to_json(f_name, orient='records')
        elif args.format == 'csv':
            df.to_csv(f_name, index=False)
        elif args.format == 'pkl':
            with open(f_name, 'wb') as f:
                pickle.dump(df, f)
        else:
            raise Exception(f'Format {args.format} is not supported.')
Exemple #3
0
 def hangouts(self):
     from parsers.hangouts import main
     parser = ArgParseDefault(
         description='Parse message logs from Google Hangouts')
     parser = add_common_parse_arguments(parser)
     parser.add_argument('-f',
                         '--file-path',
                         dest='file_path',
                         default=config['hangouts']['DEFAULT_RAW_LOCATION'],
                         help='Path to Hangouts chat log file (json file)')
     args = parser.parse_args(sys.argv[2:])
     main(args.own_name, args.file_path, args.max)
Exemple #4
0
 def messenger(self):
     from parsers.messenger import main
     parser = ArgParseDefault(
         description='Parse message logs from Facebook Messenger')
     parser = add_common_parse_arguments(parser)
     parser.add_argument(
         '-f',
         '--file-path',
         dest='file_path',
         default=config['messenger']['DEFAULT_RAW_LOCATION'],
         help='Path to Facebook messenger chat log folder')
     args = parser.parse_args(sys.argv[2:])
     main(args.own_name, args.file_path, args.max)
Exemple #5
0
 def telegram(self):
     from parsers.telegram import main
     parser = ArgParseDefault(
         description='Parse message logs from Telegram')
     parser = add_common_parse_arguments(parser)
     parser.add_argument(
         '--max-dialog',
         dest='max_dialog',
         type=int,
         default=config['telegram']['USER_DIALOG_MESSAGES_LIMIT'],
         help='Maximum number of messages to export per dialog')
     args = parser.parse_args(sys.argv[2:])
     main(args.own_name,
          max_exported_messages=args.max,
          user_dialog_messages_limit=args.max_dialog)
Exemple #6
0
 def __init__(self):
     logging.config.fileConfig('logging.conf')
     parser = ArgParseDefault(description='', usage=USAGE_DESC)
     parser.add_argument('command', help='Subcommand to run')
     args = parser.parse_args(sys.argv[1:2])
     if not hasattr(self, args.command):
         print('Unrecognized command')
         parser.print_help()
         sys.exit(1)
     getattr(self, args.command)()
Exemple #7
0
 def whatsapp(self):
     from parsers.whatsapp import main
     parser = ArgParseDefault(
         description='Parse message logs from Whatsapp')
     parser = add_common_parse_arguments(parser)
     parser.add_argument('-f',
                         '--file-path',
                         dest='file_path',
                         default=config['whatsapp']['DEFAULT_RAW_LOCATION'],
                         help='Path to Facebook messenger chat log folder')
     parser.add_argument('-i',
                         '--infer-datetime',
                         dest='infer_datetime',
                         type=str2bool,
                         nargs='?',
                         default=True,
                         help='Infer datetime regex for each chat if true')
     args = parser.parse_args(sys.argv[2:])
     main(args.own_name, args.file_path, args.max, args.infer_datetime)
Exemple #8
0
 def cloud(self):
     from visualizers.cloud import main
     parser = ArgParseDefault(description='Visualize word clouds')
     parser = add_load_data_args(parser)
     parser.add_argument('-m',
                         '--mask-image',
                         dest='mask_image',
                         type=str,
                         default=None,
                         help='Image to use as mask',
                         required=True)
     parser.add_argument('--sw',
                         '--stopword-paths',
                         dest='stopword_paths',
                         nargs='+',
                         default=['stopwords/en.json'],
                         help='Path to stopword files (JSON format)')
     parser.add_argument('-n',
                         '--num-words',
                         dest='num_words',
                         type=int,
                         default=10000,
                         help='Print up to n words into the cloud')
     parser.add_argument('--density',
                         '--dpi',
                         dest='dpi',
                         type=int,
                         default=300,
                         help='Rendered image DPI')
     args = parser.parse_args(sys.argv[2:])
     main(args)
def parse_args():
    parser = ArgParseDefault()
    parser.add_argument('-i',
                        '--index',
                        required=True,
                        type=str,
                        help='Name of index')
    parser.add_argument('-d',
                        '--doc-type',
                        dest='doc_type',
                        default='tweet',
                        required=False,
                        type=str,
                        help='Doc type')
    parser.add_argument('-o',
                        '--out',
                        choices=['csv'],
                        required=False,
                        default='csv',
                        type=str,
                        help='Output format')
    parser.add_argument('--output-file',
                        dest='output_file',
                        required=False,
                        default=None,
                        type=str,
                        help='Output filename')
    parser.add_argument('--es',
                        choices=['dev', 'stg', 'prd'],
                        required=False,
                        default='prd',
                        type=str,
                        help='Elasticsearch cluster')
    parser = add_agg_query_args(parser)
    args = parser.parse_args()
    return args