def __init__(self, config): super().__init__(config) self.epsilon = 1e-6 hrnet_config = read_yaml(HRNET_CONFIG) self.encoder = get_pose_net(hrnet_config.MODEL36, True) self.layer_flattener = nn.Sequential( nn.Conv2d(42, 32, kernel_size=(8, 8), stride=8), nn.Flatten())
def __initialize(self): sd_path = self.configs.symbol_dict.path + '.yml' emb_path = self.configs.embedding_table.path + '.parquet' logger.info('Loading symbol_dict from {}'.format(sd_path)) self.sd = read_yaml(sd_path) logger.info('Loading emb_table from {}'.format(emb_path)) self.emb = load_embedding(emb_path)
def __init__(self, config): super().__init__(config) self.epsilon = 1e-6 hrnet_config = read_yaml(HRNET_CONFIG) self.encoder = get_pose_net(hrnet_config.MODEL36, True) # self.final_layers = nn.Sequential( # nn.Conv2d(128, 64, kernel_size=(3, 3), stride=1), # nn.BatchNorm2d(64), # nn.ReLU(), # nn.Conv2d(64, 42, kernel_size=(3, 3), stride=1), # ) self.final_layers = nn.Sequential( nn.Conv2d(128, 64, kernel_size=(3, 3), stride=1), nn.Conv2d(64, 42, kernel_size=(3, 3), stride=1), )
'fields': ['snap_time', 'open', 'close', 'high', 'low', 'ticker', 'source'] 'data': [ ['2017-01-01 22:30', 14.11, 14.12, 14.13, 14.11, 'BTC_ETH', 'poloniex'], ['2017-01-01 22:35', 14.12, 14.11, 14.14, 14.10, 'BTC_ETH', 'poloniex'], ['2017-01-01 22:40', None, None, None, None, 'BTC_ETH', 'poloniex'], ... ] } """ from src.utils import DataCleaning, get_response, read_yaml, convert_to_epoch import datetime import logging as log CONFIG = read_yaml('src/config.yaml') def clean_data(data, fields): """Takes a list of dicts and cleans each field according to the cleaning functions specified in fields. Args: data (list): List of dicts, where each dict is {'field1': val1, ...} fields (dict): A dictionary that defines each fields cleaning function, along with any arguments required by that cleaning function. Fields must be in the following format: { 'field1': { 'cleaning_func': 'func_name', 'args': {'arg1':val1, 'arg2'}
def __init__(self, config: edict): super().__init__(config) self.epsilon = 1e-6 hrnet_config = read_yaml(HRNET_CONFIG) self.encoder = get_pose_net(hrnet_config.MODEL36, True)
import pandas as pd import numpy as np from src.utils import read_yaml, load_semeval15_laptop, search_keyword pd.set_option('display.max_rows', 500) pd.set_option('display.max_columns', 500) pd.set_option('display.width', 1000) pd.set_option('max_colwidth', 500) corpus = load_semeval15_laptop('data/SemEval15_laptop/train.csv', 'data/SemEval15_laptop/test.csv') vocab = read_yaml('data/SemEval15_laptop/glove_symdict.yml') atlx = pd.read_csv('data/processed/lexicon_table_v2.csv', index_col='WORD') atlx['ATLX'] = atlx.mean(axis=1).apply(lambda p: -1 if p < 0 else (0 if p==0 else 1)) dalx = pd.read_csv('data/output/lexicon_table_dalx_07_thres0.7_C10.csv', index_col='WORD') diff_df = atlx.join(dalx[['DALX']]) diff_df['INVOCAB'] = diff_df.index.isin(vocab) # Words in S15 vocab diff_df.INVOCAB.value_counts() # 839 # Words that changed polarity by SVM # 03 05_C10 05_C10_T.7 diff_df.loc[(diff_df.DALX.notna()) & (diff_df.ATLX != diff_df.DALX)] # 1962 2058 308 # Words in S15 vocab but not changed polarity by SVM 03 05_C10 05_C10_T 05_C1_T
import sys sys.path.append('P:\Projects\dalx') import pandas as pd import numpy as np import streamlit as st from src.utils import read_yaml, load_semeval15_laptop, search_keyword import logging.config pd.set_option('display.width', 1000) import time logging.config.dictConfig(read_yaml('logger_configs.yml')) logger = logging.getLogger() logger.info('----- Start annotator -----') """# SemEval15 Laptop Lexicon Annotator""" @st.cache def load_corpus_and_general_lexicon(): train = 'data/SemEval15_laptop/train.csv' test = 'data/SemEval15_laptop/test.csv' logger.info(f'Loading SemEval15 Laptop corpus from: {train} {test}') corpus = load_semeval15_laptop(train, test) g_lx = pd.read_csv('data/processed/lexicon_table_v2.csv', index_col='WORD') return corpus, g_lx def load_annotation_lexicon(path): logger.info(f'Loading lexicon from: {path}') a_df = pd.read_csv(path, index_col='WORD') n_notes = a_df.ANNOTATION.notna().sum() total_words = len(a_df)
import json import os from tqdm import tqdm from src.url_functions import get_all_news_urls_from_topics_links, get_content_news_from_news_url from src.utils import read_yaml # read list of links for each topic topics_links = read_yaml('src/links.yaml') # get the list of links of news for each topic print('Get the list of links of news for each topic') topics_links = get_all_news_urls_from_topics_links(topics_links, n_pages_per_topic=1) # the number of news links per topic for k, v in topics_links.items(): print(f'topic: {k} - No.samples: {len(v)}') # set output path OUTPUT = 'data/crawl_data' os.makedirs(OUTPUT, exist_ok=False) print('\nGet news content and save to storage') for topic, links in topics_links.items(): print(f'topic: {topic} - No.samples: {len(links)}') file_path = os.path.join(OUTPUT, f'{topic}.txt') with open(file_path, 'w') as f: for link in tqdm(links): s = get_content_news_from_news_url(link)