Beispiel #1
0
    def __init__(self, config_path):
        if not os.path.exists(config_path):
            raise ValueError("Can't find config file \"" + config_path + "\"")
        #if

        sys.stdout = Unbuffered(sys.stdout)
        sys.stderr = Unbuffered(sys.stderr)

        self.__config = ConfigParser()
        self.__config.read(config_path)
        self.__active_mails_count = 0
        self.__send_lock = Lock()

        self.__users_db = Users(self.get_param_str('Main', 'ACCESS_FILE_PATH'))
        self.__whitelist = FilterList(
            self.get_param_str('Main', 'WHITELIST_SENDERS'))
        self.__blacklist = FilterList(
            self.get_param_str('Main', 'BLACKLIST_SENDERS'))
Beispiel #2
0
from __future__ import unicode_literals
import sys
import attr
from config import config
import spacy
# nltk.download('wordnet')  # cancel this comment for the first run
from nltk.corpus import wordnet as wn
from get_NE_list import NE_list
import numpy as np
from unbuffered import Unbuffered
from itertools import combinations
from functools import partial
from nltk import ngrams
from collections import Counter

sys.stdout = Unbuffered(sys.stdout)
nlp = spacy.load('en_core_web_sm')

# load bigram_candidates
bigram_candidate = np.load(
    'bigram/bigram_syn_agnews.npy')  # change the path if use other dataset
bigram_candidate_list = []
for i in range(len(bigram_candidate)):
    bigram_candidate_list.append(list(bigram_candidate[i]))
bigrams_have_syns = [item[0] for item in bigram_candidate_list]

supported_pos_tags = [
    'CC',  # coordinating conjunction, like "and but neither versus whether yet so"
    # 'CD',   # Cardinal number, like "mid-1890 34 forty-two million dozen"
    # 'DT',   # Determiner, like all "an both those"
    # 'EX',   # Existential there, like "there"
        entrydict['avetime'] = coursesoup.findAll('strong')[9].parent.findAll('td')[5].string.encode('ascii','ignore')


    # print output along with id number when:
    # - not every char in avetime is a num or '.'
    # - raise BadStatusLine(line)
    #   - depending on what this error means, the best action might be to
    #     relogin and try downloading it again

    # I ended up just printing it every time (why not?) but I should still check out that error

    return entrydict

def entryGen(start, end):
    for i in xrange(start,end):
        if i is not None:
            yield makeEntry(i)
# old starting indices
# 886
# 906
# 30429
out = open(sys.argv[1], 'w')
# set it to write after every new entry is received in case of early termination
out = Unbuffered(out)
out.write('---\n')
for e in entryGen(6636,30429):
    yamline = yaml.dump(e)
    out.write('- {0:s}'.format(yamline))
    #print yamline
out.closed