def __init__(self, config):
     super().__init__(config)
     self.epsilon = 1e-6
     hrnet_config = read_yaml(HRNET_CONFIG)
     self.encoder = get_pose_net(hrnet_config.MODEL36, True)
     self.layer_flattener = nn.Sequential(
         nn.Conv2d(42, 32, kernel_size=(8, 8), stride=8), nn.Flatten())
예제 #2
0
 def __initialize(self):
     sd_path = self.configs.symbol_dict.path + '.yml'
     emb_path = self.configs.embedding_table.path + '.parquet'
     logger.info('Loading symbol_dict from {}'.format(sd_path))
     self.sd = read_yaml(sd_path)
     logger.info('Loading emb_table from {}'.format(emb_path))
     self.emb = load_embedding(emb_path)
 def __init__(self, config):
     super().__init__(config)
     self.epsilon = 1e-6
     hrnet_config = read_yaml(HRNET_CONFIG)
     self.encoder = get_pose_net(hrnet_config.MODEL36, True)
     # self.final_layers = nn.Sequential(
     #     nn.Conv2d(128, 64, kernel_size=(3, 3), stride=1),
     #     nn.BatchNorm2d(64),
     #     nn.ReLU(),
     #     nn.Conv2d(64, 42, kernel_size=(3, 3), stride=1),
     # )
     self.final_layers = nn.Sequential(
         nn.Conv2d(128, 64, kernel_size=(3, 3), stride=1),
         nn.Conv2d(64, 42, kernel_size=(3, 3), stride=1),
     )
예제 #4
0
    'fields': ['snap_time', 'open', 'close', 'high', 'low', 'ticker', 'source']
    'data': [
        ['2017-01-01 22:30', 14.11, 14.12, 14.13, 14.11, 'BTC_ETH', 'poloniex'],
        ['2017-01-01 22:35', 14.12, 14.11, 14.14, 14.10, 'BTC_ETH', 'poloniex'],
        ['2017-01-01 22:40', None, None, None, None, 'BTC_ETH', 'poloniex'],
        ...
    ]
}

"""

from src.utils import DataCleaning, get_response, read_yaml, convert_to_epoch
import datetime
import logging as log

CONFIG = read_yaml('src/config.yaml')


def clean_data(data, fields):
    """Takes a list of dicts and cleans each field
    according to the cleaning functions specified in fields.

    Args:
        data (list): List of dicts, where each dict is {'field1': val1, ...}
        fields (dict): A dictionary that defines each fields cleaning function,
            along with any arguments required by that cleaning function. Fields
            must be in the following format:
            {
                'field1': {
                    'cleaning_func': 'func_name',
                    'args': {'arg1':val1, 'arg2'}
 def __init__(self, config: edict):
     super().__init__(config)
     self.epsilon = 1e-6
     hrnet_config = read_yaml(HRNET_CONFIG)
     self.encoder = get_pose_net(hrnet_config.MODEL36, True)
예제 #6
0
import pandas as pd
import numpy as np
from src.utils import read_yaml, load_semeval15_laptop, search_keyword
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
pd.set_option('max_colwidth', 500)


corpus = load_semeval15_laptop('data/SemEval15_laptop/train.csv', 'data/SemEval15_laptop/test.csv')

vocab = read_yaml('data/SemEval15_laptop/glove_symdict.yml')

atlx = pd.read_csv('data/processed/lexicon_table_v2.csv', index_col='WORD')
atlx['ATLX'] = atlx.mean(axis=1).apply(lambda p: -1 if p < 0 else (0 if p==0 else 1))

dalx = pd.read_csv('data/output/lexicon_table_dalx_07_thres0.7_C10.csv', index_col='WORD')

diff_df = atlx.join(dalx[['DALX']])
diff_df['INVOCAB'] = diff_df.index.isin(vocab)



# Words in S15 vocab
diff_df.INVOCAB.value_counts() # 839


# Words that changed polarity by SVM                                 # 03   05_C10 05_C10_T.7
diff_df.loc[(diff_df.DALX.notna()) & (diff_df.ATLX != diff_df.DALX)] # 1962 2058   308

# Words in S15 vocab but not changed polarity by SVM                                               03  05_C10 05_C10_T 05_C1_T
예제 #7
0
import sys
sys.path.append('P:\Projects\dalx')
import pandas as pd
import numpy as np
import streamlit as st
from src.utils import read_yaml, load_semeval15_laptop, search_keyword
import logging.config
pd.set_option('display.width', 1000)
import time


logging.config.dictConfig(read_yaml('logger_configs.yml'))
logger = logging.getLogger()
logger.info('----- Start annotator -----')

"""# SemEval15 Laptop Lexicon Annotator"""

@st.cache
def load_corpus_and_general_lexicon():
    train = 'data/SemEval15_laptop/train.csv'
    test = 'data/SemEval15_laptop/test.csv'
    logger.info(f'Loading SemEval15 Laptop corpus from: {train} {test}')
    corpus = load_semeval15_laptop(train, test)
    g_lx = pd.read_csv('data/processed/lexicon_table_v2.csv', index_col='WORD')
    return corpus, g_lx

def load_annotation_lexicon(path):
    logger.info(f'Loading lexicon from: {path}')
    a_df = pd.read_csv(path, index_col='WORD')
    n_notes = a_df.ANNOTATION.notna().sum()
    total_words = len(a_df)
예제 #8
0
import json
import os
from tqdm import tqdm

from src.url_functions import get_all_news_urls_from_topics_links, get_content_news_from_news_url
from src.utils import read_yaml

# read list of links for each topic
topics_links = read_yaml('src/links.yaml')

# get the list of links of news for each topic
print('Get the list of links of news for each topic')
topics_links = get_all_news_urls_from_topics_links(topics_links,
                                                   n_pages_per_topic=1)

# the number of news links per topic
for k, v in topics_links.items():
    print(f'topic: {k} - No.samples: {len(v)}')

# set output path
OUTPUT = 'data/crawl_data'
os.makedirs(OUTPUT, exist_ok=False)

print('\nGet news content and save to storage')
for topic, links in topics_links.items():
    print(f'topic: {topic} - No.samples: {len(links)}')

    file_path = os.path.join(OUTPUT, f'{topic}.txt')
    with open(file_path, 'w') as f:
        for link in tqdm(links):
            s = get_content_news_from_news_url(link)