Example #1
0
    def _load(self, company_id):
        cfr = ConfigReader()
        p1 = cfr.get('ROOT_ORIGINAL')
        p2 = cfr.get('FORMATTED_PR')
        p3 = os.path.join(p1, p2)
        metafile = common.get_list_file_name(company_id)

        path_text = os.path.join(p3, str(company_id))
        path_meta = os.path.join(p3, metafile)

        pr_text = self._load_text(path_text)
        self._load_meta(path_meta, pr_text)
Example #2
0
 def get_linkpages(self):
     all_html = []
     cfr = ConfigReader()
     root = cfr.get('ROOT_ORIGINAL')
     path1 = cfr.get('PR_SOURCES')
     path2 = os.path.join(root, path1)
     path = os.path.join(path2, 'microsoft.html')
     print 'collecting links from source file'
     with open(path) as f:
         lines = f.readlines()
         all_html.append(''.join(lines))
     return all_html
Example #3
0
 def _load_nonpubs(self):
     self._nonpubs = set()
     cfr = ConfigReader()
     path = os.path.abspath(cfr.get('NONPUBS'))
     with open(path) as f:
         for line in f.readlines():
             line = line.strip().lower()
             if len(line) > 0:
                 self._nonpubs.add(line)
Example #4
0
 def _load_path(self):
     cfr = ConfigReader()
     root = cfr.get('ROOT_ORIGINAL')
     path1 = cfr.get('DOWNLOADED_PR')
     path2 = os.path.join(root, path1)
     self._path_dir = os.path.join(path2, self._company_id)
Example #5
0
def get_pickled_matches_path(company_id, subset_name):
    cr = ConfigReader()
    dir_path = cr.get('PICKLED_MATCHES')
    path = os.path.join(dir_path, subset_name)
    filename = '{0}.pickle'.format(company_id)
    return os.path.join(path, filename)
Example #6
0
def get_pickled_pr_tokens_path(company_id):
    cr = ConfigReader()
    dir_path = cr.get('PICKLED_TOKENS_PR')
    filename = '{0}.pickle'.format(company_id)
    return os.path.join(dir_path, filename)
Example #7
0
def get_sentiment_scores_path(company_id):
    cr = ConfigReader()
    dir_path = cr.get('SENTIMENT_SCORES')
    return os.path.join(dir_path, str(company_id))
Example #8
0
def get_subjlexicon_path(stemmed=False):
    cr = ConfigReader()
    if stemmed:
        return cr.get('SUBJLEXICON-STEMMED')
    else:
        return cr.get('SUBJLEXICON')
Example #9
0
def get_quotes_path(company_id):
    cr = ConfigReader()
    dir_path = cr.get('QUOTES')
    filename = '{0}'.format(company_id)
    return os.path.join(dir_path, filename)
Example #10
0
def get_pairs_path(pairs_name):
    cr = ConfigReader()
    dir_path = cr.get('PAIRS')
    return os.path.join(dir_path, pairs_name)
Example #11
0
def get_blocks_path(blocks_name):
    cr = ConfigReader()
    dir_path = cr.get('BLOCKS')
    return os.path.join(dir_path, blocks_name)
Example #12
0
def get_art_duplicates_path(company_id):
    cr = ConfigReader()
    dir_path = cr.get('DUPLICATES_ART')
    return os.path.join(dir_path, str(company_id))
Example #13
0
def get_text_path(text_name, company_id):
    cr = ConfigReader()
    dir_path = cr.get('TEXT')
    subdir = os.path.join(dir_path, text_name)
    return os.path.join(subdir, str(company_id))
Example #14
0
def get_sents_path(sents_name, company_id):
    cr = ConfigReader()
    dir_path = cr.get('SENTS')
    subdir = os.path.join(dir_path, sents_name)
    return os.path.join(subdir, str(company_id))
Example #15
0
def get_postags_path():
    cr = ConfigReader()
    return cr.get('POSTAGS')
Example #16
0
 def _get_input_path(self):
     cfr = ConfigReader()
     root = cfr.get('ROOT_ORIGINAL')
     path1 = cfr.get('DOWNLOADED_NEWS')
     path2 = os.path.join(root, path1)
     return os.path.join(path2, self._company_id)
Example #17
0
def get_pickled_subset_path(subset_name, doctype):
    cr = ConfigReader()
    base = cr.get('PICKLED_SUBSETS')
    dir_path = os.path.join(base, subset_name)
    filename = '{0}.pickle'.format(doctype)
    return os.path.join(dir_path, filename)
Example #18
0
def get_sentiment_words_neg_path(company_id):
    cr = ConfigReader()
    dir_path = cr.get('SENTIMENT_WORDS_NEG')
    return os.path.join(dir_path, str(company_id))