예제 #1
0
def download_with_progress(url, out, user=None, password=None):
    logger.info(f"Downloading {url} as {out}")

    create_output_dir(out)

    auth = None
    if user is not None or password is not None:
        auth = (user, password)

    with open(out, 'wb') as f:
        response = requests.get(url, auth=auth, stream=True)
        total_length = response.headers.get('content-length')

        if total_length is None:  # no content length header
            f.write(response.content)
        else:
            dl = 0
            total_length = int(total_length)
            for data in response.iter_content(chunk_size=4096):
                dl += len(data)
                f.write(data)
                done = int(50 * dl / total_length)
                sys.stdout.write("\r[%s%s]" % ('=' * done, ' ' * (50 - done)))
                sys.stdout.flush()
    sys.stdout.write("\n")
    sys.stdout.flush()

    if out.endswith('.tar'):
        extract_tar(out)

    logger.info(f"Done downloading {url} as {out}")
예제 #2
0
def _main(input_path, output_path):
    create_output_dir(output_path)

    for image_path in list_directory(input_path):
        tweet_id = get_tweet_id_from_image_path(image_path)

        sentiment_output_path = get_sentiment_output_path(
            output_path, tweet_id)

        if os.path.exists(sentiment_output_path):
            continue

        logger.debug(f"Loading image {image_path}")
        with open(image_path, 'rb') as image_file:
            image = image_file.read()

        logger.debug(f"Requesting sentiment for {image_path}")
        extension = os.path.splitext(image_path)[1]
        response = request_image_sentiment(image, extension)
        if not response:
            logger.debug(f"Invalid sentiment for {image_path}")
            continue

        sentiment = json.loads(response.content)

        logger.debug(
            f"Writing sentiment {sentiment} to {sentiment_output_path}")
        write_sentiment(sentiment_output_path, sentiment)
예제 #3
0
def _main(input_path, output_path, period):
    logger.info(
        f"Loading Twitter dataset {input_path} and writing to {output_path}")
    logger.info(f"Period: {period}")

    create_output_dir(output_path)

    df = load_twitter_dataset(input_path)

    pipeline = [
        wrapped_partial(select_columns, columns=SELECTED_COLUMNS),
        normalize_columns, generate_overall_image_sentiment, normalize_columns,
        generate_fused_sentiment, normalize_columns,
        generate_overall_categories,
        wrapped_partial(aggregate_overall_sentiment, period=period), drop_na
    ]

    for stage in pipeline:
        logger.debug(f"Running stage {stage.__name__}")
        df = stage(df)

    logger.debug(f"Dataframe sample:\n{df}")

    logger.info(f"Writing Twitter dataset to {output_path}")
    df.to_json(output_path)
예제 #4
0
def _main(output_path):
    create_output_dir(output_path)

    with stream_tweets() as stream:
        for tweet_json in stream.iter_lines(decode_unicode=True):
            try:
                tweet = json.loads(tweet_json)
            except Exception as e:
                logger.error(f"Error parsing tweet {tweet_json}")

            if tweet_qualifies_for_use(tweet):
                save_tweet(tweet, output_path)
예제 #5
0
def _main(input_path, output_path):
    create_output_dir(output_path)

    tweet_image_paths_to_download = []

    for tweet in list_tweets(input_path):
        tweet_image_url = get_tweet_image_url(tweet)
        tweet_image_path = get_tweet_image_path(tweet, output_path)
        if not os.path.exists(tweet_image_path):
            tweet_image_paths_to_download.append(
                (tweet_image_url, tweet_image_path))

    cpu_count = multiprocessing.cpu_count()
    multiprocessing.Pool(cpu_count * 4).map(download_tweet_image,
                                            tweet_image_paths_to_download)
예제 #6
0
def _main(input_path, output_path, tickers):
    logger.info(f"Fetching stocks {tickers}")
    logger.info(f"Using time input range from {input_path}")

    create_output_dir(output_path)

    start_at, end_at = get_tweets_timespan(input_path)
    logger.debug(f"Stocks start time: {start_at}, end time: {end_at}")
    logger.debug(f"NOTE: This may be smaller than you expect due to weekends")

    stocks = [fetch_stock_history(ticker, start_at, end_at)
              for ticker in tickers]

    for ticker, stocks_df in zip(tickers, stocks):
        num_rows = len(stocks_df)
        logger.debug(f"Writing history for {ticker}: {num_rows} rows")
        write_stock_history(output_path, ticker, stocks_df)
예제 #7
0
def _main(output_path, input_dirs):
    logger.info(
        f"Loading directories {input_dirs} and writing merge to {output_path}")

    create_output_dir(output_path)

    input_dirs_lists = get_input_dirs_lists(input_dirs)
    merged_list = merge_input_dir_lists(input_dirs_lists)

    logger.debug(f"Merging and writing {list(merged_list)[:10]}")

    merged_dicts = generate_merged_dicts(input_dirs, merged_list)
    filtered_dicts = filter(lambda dict_: not not dict_, merged_dicts)
    flattened_dicts = [
        flatten(dict_, reducer='dot') for dict_ in filtered_dicts
    ]

    df = pd.DataFrame.from_records(flattened_dicts)
    df.to_json(output_path)

    logger.info(f"Wrote merged files to {output_path}")
예제 #8
0
                adjustment_position=adjustment_position)

            calibration_dir = helpers.create_calibration_dir()
            helpers.check_existing_calibration(calibration_dir, my_loadcell)
            if my_loadcell.is_calibrated is not True:
                helpers.calibrate_loadcell(my_loadcell, calibration_dir)

            helpers.start_manual_mode(my_controller,
                                      my_loadcell,
                                      speed=3,
                                      mode_button_pin=22,
                                      up_button_pin=17,
                                      down_button_pin=27)

            test_parameters = helpers.read_test_parameters(test_type=result)
            output_dir = helpers.create_output_dir(test_parameters)
            helpers.save_test_parameters(my_controller, my_loadcell,
                                         test_parameters, output_dir)

            helpers.start_test(my_controller,
                               my_loadcell,
                               test_parameters,
                               output_dir=output_dir,
                               stop_button_pin=22)
    elif result == 'static':
        calibration_dir = helpers.create_calibration_dir()
        helpers.check_existing_calibration(calibration_dir, my_loadcell)
        if my_loadcell.is_calibrated is not True:
            helpers.calibrate_loadcell(my_loadcell, calibration_dir)

        helpers.start_manual_mode(my_controller,
예제 #9
0
def _main(input_path, output_path):
    create_output_dir(output_path)
    files_to_analyze = get_files_to_analyze(input_path, output_path)
    analyze_tweets(files_to_analyze, output_path)
    logger.info(f"Entity Sentiment Analysis complete")