Ejemplo n.º 1
0
def save_pandas_data(file_name, dat, old_data=None, verbose=VERBOSE):
    try:
        data = clean_pandas_data(dat)

        if old_data is not None:
            try:
                # Avoid the last index as it may contain an incomplete week or month
                last_dt = old_data.index[-2]
                idx = data.index.get_loc(last_dt.strftime("%Y-%m-%d"))
                updated_data = pd.concat(
                    (old_data.iloc[:-2, :], data.iloc[idx:, :]), axis=0)
                updated_data.reset_index().to_csv(
                    file_name, index=False, compression="infer")  # Update
            except KeyError as err:
                LOG.error(f"Error updating the data: {err}")
        else:
            data.reset_index().to_csv(file_name,
                                      index=False,
                                      compression="infer")  # Save

        if verbose > 1:
            symbol = file_name.parent.name
            LOG.info(
                f"Saved {symbol} data:{get_tabs(symbol, prev=12)}[{file_name.stem}] OK"
            )
    except Exception as err:
        LOG.error(
            f"ERROR saving data:\t\t{file_name.parent.name + file_name.stem} "
            f"{err.__repr__()} {traceback.print_tb(err.__traceback__)}")
Ejemplo n.º 2
0
async def update_stock_info(info_file, info, create=True, verbose=VERBOSE):
    try:
        # Clean key names
        clean_info = clean_enumeration(info)
        clean_info.pop('matchScore', None)

        # Read previous info
        if info_file.exists():
            old_info = await read_info_file(info_file,
                                            check=False,
                                            verbose=verbose)
        else:
            old_info = {}

        await save_stock_info(info_file,
                              clean_info,
                              old_info=old_info,
                              create=create)
        if verbose > 1:
            symbol = info_file.parent.name
            LOG.info(f"Updating {symbol} info:{get_tabs(symbol, prev=15)}OK")
    except Exception as err:
        LOG.error(
            f"ERROR updating info: {info_file}. Msg: {err.__repr__()} {traceback.print_tb(err.__traceback__)}"
        )
Ejemplo n.º 3
0
def get_optimizer(params: Iterable,
                  args: argparse.Namespace) -> Optional[optim.Optimizer]:
    name = args.optim_type.lower()
    if name == 'sgd':
        LOG.info(
            f"SGD Optimizer <lr={args.lr}, momentum={args.momentum}, nesterov=True>"
        )
        return optim.SGD(params=params,
                         lr=args.lr,
                         momentum=args.momentum,
                         nesterov=True)
    elif name == 'adam':
        LOG.info(
            f"Adam Optimizer <lr={args.lr}, betas={args.betas}, eps={args.eps}>"
        )
        return optim.Adam(params=params,
                          lr=args.lr,
                          betas=args.betas,
                          eps=args.eps)
    elif name == 'adamw':
        LOG.info(
            f"Adam Optimizer <lr={args.lr}, betas={args.betas}, eps={args.eps}, weight_decay={args.weight_decay}>"
        )
        return optim.AdamW(params=params,
                           lr=args.lr,
                           betas=args.betas,
                           eps=args.eps,
                           weight_decay=args.weight_decay)
    else:
        LOG.error(f"Unsupported optimizer: [bold red]{name}[/].")
        raise ValueError(f"Unsupported optimizer: {name}.")
Ejemplo n.º 4
0
def _get_activation(name: str) -> Optional[nn.Module]:
    if name.lower() == 'relu':
        return nn.ReLU()
    # TODO: test other activations
    else:
        LOG.error(f'Unrecognized activation function: [bold red]{name}[/].')
        raise ValueError(f'Unrecognized activation function: {name}.')
Ejemplo n.º 5
0
def read_pandas_data(file_name):
    if not file_name.exists():
        LOG.error(f"ERROR: data not found for {file_name}")
        return None
    return pd.read_csv(file_name,
                       parse_dates=['date'],
                       index_col='date',
                       date_parser=dateparse)
Ejemplo n.º 6
0
def clean_pandas_data(dat):
    """Receives a dictionary of data, transform the dict into a pandas DataFrame and clean the column names"""
    try:
        data = pd.DataFrame.from_dict(dat, orient="index")
        # Apply clean names to columns and index
        column_names = clean_enumeration(data.columns.tolist())
        data.columns = column_names
        data.index.name = 'date'
        data.sort_index(axis=0, inplace=True, ascending=True)  # Sort by date
    except Exception as err:
        LOG.error(f"Error cleaning dataset: {err}")
        data = None
    return data
Ejemplo n.º 7
0
def manage_vantage_errors(response, symbol):
    if "Error Message" in response.keys():
        LOG.error(
            f"ERROR: Not possible to retrieve {symbol}. Msg: {response['Error Message']}"
        )
    elif "Note" in response.keys():
        if response["Note"][:111] == 'Thank you for using Alpha Vantage! Our standard API call frequency ' \
                                     'is 5 calls per minute and 500 calls per day.':
            LOG.info(
                f"Retrieving {symbol}:{get_tabs(symbol, prev=12)}Max frequency reached! Waiting..."
            )
            return "longWait"
    return None
Ejemplo n.º 8
0
async def read_info_file(info_file, check=True, verbose=VERBOSE):
    if not info_file:
        return {}
    if info_file.exists():
        async with aiofiles.open(info_file, "r") as info:
            data = await info.read()
            if verbose > 1:
                LOG.info(f"Info file read:{get_tabs('', prev=15)}{info_file}")
            return json.loads(data)
    else:
        if check:
            LOG.error(f"ERROR: No info found at {info_file}")
        if verbose > 1:
            LOG.warning(f"Info file: {info_file}\tDO NOT EXISTS!")
        return {}
Ejemplo n.º 9
0
async def query_data(symbol,
                     category=None,
                     api="vantage",
                     verbose=VERBOSE,
                     **kwargs):
    if category is None:
        raise ValueError("Please provide a valid category in the parameters")
    # Get semaphore
    semaphore_controller.get_semaphore(api)

    if verbose > 2:
        LOG.info("Successfully acquired the semaphore")

    if api == "vantage":
        url, params = alpha_vantage_query(symbol,
                                          category,
                                          key=KEYS_SET["alpha_vantage"],
                                          **kwargs)
        LOG.info(
            f"Retrieving {symbol}:{get_tabs(symbol, prev=12)}From '{api}' API")
    else:
        LOG.error(f"Not supported api {api}")

    counter = 0
    while counter <= QUERY_RETRY_LIMIT:
        async with aiohttp.ClientSession() as session:
            async with session.get(url, params=params,
                                   headers=HEADERS) as resp:
                data = await resp.json()

        if api == "vantage":
            if manage_vantage_errors(data, symbol) == "longWait":
                counter += 1
                await asyncio.sleep(VANTAGE_WAIT)
            else:
                break

    await asyncio.sleep(MIN_SEM_WAIT)
    if verbose > 2:
        LOG.info("Releasing Semaphore")
    # Release semaphore
    semaphore_controller.release_semaphore(api)
    return data
Ejemplo n.º 10
0
def prepare_MTT_dataset(args):
    CONSOLE.rule("Pre-processing MTT Annotations and Data for Machine Learning")

    # --- get dirs ---
    # create out dir if not exists
    p_out = Path(args.p_out).absolute()
    while True:
        if p_out.exists():
            res = CONSOLE.input(f"Output folder exists ({p_out.as_posix()})! Do you want to remove it first? "
                                f"(You can also clean it manually now and hit enter key to retry) [y/n]: ")
            if res.lower() in ['y', 'yes']:
                # delete target folder
                shutil.rmtree(p_out)
                # create new one
                p_out.mkdir()
                LOG.info(f"Target folder removed, and new empty folder created.")
                break
            elif res.lower() in ['n', 'no']:
                LOG.error(f"Output folder exists! Creating folder failed. Target: {p_out.as_posix()} exists.")
                raise FileExistsError(f"Output folder exists! Creating folder failed. Target: {p_out.as_posix()} exists.")
            else:
                continue
        else:
            p_out.mkdir()
            LOG.info(f"Target folder ({p_out.as_posix()}) created.")
            break
    # train/val/test dirs
    p_out.joinpath('train').mkdir()
    p_out.joinpath('val').mkdir()
    p_out.joinpath('test').mkdir()
    # check raw data
    p_raw = Path(args.p_raw).absolute()
    assert len(list(p_raw.glob('[0-9, a-z]'))) == 16, "MTT Raw data should have 16 directories from 0-9 and a-f."

    # --- parsing and processing annotations ---
    annotations = process_MTT_annotations(p_anno=args.p_anno,
                                          p_info=args.p_info,
                                          delimiter=args.delimiter,
                                          n_top=args.n_topk)
    # save processed annotations
    annotations.to_csv(Path(args.p_anno).parent.joinpath(f'annotations_top{args.n_topk}.csv').as_posix(), index=False)
    # save topk labels
    with open(Path(args.p_anno).parent.joinpath('labels.txt').as_posix(), 'w') as f:
        f.write(','.join(annotations.columns.tolist()[:args.n_topk]))

    CONSOLE.rule("Audio Preprocessing")
    LOG.info(f"MTT annotations processed. Now segmenting audios based on annotations for machine learning...")

    # --- process audio files based on annotations ---
    avg = annotations.shape[0] // args.n_worker
    processes = [Process(target=_process_audio_files, args=(i,
                                                            annotations.iloc[i*avg:(i+1)*avg],
                                                            p_out,
                                                            p_raw,
                                                            args.n_samples,
                                                            args.sr,
                                                            args.n_topk))
                 if i != args.n_worker-1
                 else Process(target=_process_audio_files, args=(i,
                                                                 annotations.iloc[i*avg:],
                                                                 p_out,
                                                                 p_raw,
                                                                 args.n_samples,
                                                                 args.sr,
                                                                 args.n_topk))
                 for i in range(args.n_worker)]

    LOG.info(f"{args.n_worker} workers created.")

    # start jobs
    for p in processes:
        p.start()

    # wait jobs to finish
    for p in processes:
        p.join()

    CONSOLE.rule('MTT Dataset Preparation Done')
    return