def __init__(self, config_path): if not os.path.exists(config_path): raise ValueError("Can't find config file \"" + config_path + "\"") #if sys.stdout = Unbuffered(sys.stdout) sys.stderr = Unbuffered(sys.stderr) self.__config = ConfigParser() self.__config.read(config_path) self.__active_mails_count = 0 self.__send_lock = Lock() self.__users_db = Users(self.get_param_str('Main', 'ACCESS_FILE_PATH')) self.__whitelist = FilterList( self.get_param_str('Main', 'WHITELIST_SENDERS')) self.__blacklist = FilterList( self.get_param_str('Main', 'BLACKLIST_SENDERS'))
from __future__ import unicode_literals import sys import attr from config import config import spacy # nltk.download('wordnet') # cancel this comment for the first run from nltk.corpus import wordnet as wn from get_NE_list import NE_list import numpy as np from unbuffered import Unbuffered from itertools import combinations from functools import partial from nltk import ngrams from collections import Counter sys.stdout = Unbuffered(sys.stdout) nlp = spacy.load('en_core_web_sm') # load bigram_candidates bigram_candidate = np.load( 'bigram/bigram_syn_agnews.npy') # change the path if use other dataset bigram_candidate_list = [] for i in range(len(bigram_candidate)): bigram_candidate_list.append(list(bigram_candidate[i])) bigrams_have_syns = [item[0] for item in bigram_candidate_list] supported_pos_tags = [ 'CC', # coordinating conjunction, like "and but neither versus whether yet so" # 'CD', # Cardinal number, like "mid-1890 34 forty-two million dozen" # 'DT', # Determiner, like all "an both those" # 'EX', # Existential there, like "there"
entrydict['avetime'] = coursesoup.findAll('strong')[9].parent.findAll('td')[5].string.encode('ascii','ignore') # print output along with id number when: # - not every char in avetime is a num or '.' # - raise BadStatusLine(line) # - depending on what this error means, the best action might be to # relogin and try downloading it again # I ended up just printing it every time (why not?) but I should still check out that error return entrydict def entryGen(start, end): for i in xrange(start,end): if i is not None: yield makeEntry(i) # old starting indices # 886 # 906 # 30429 out = open(sys.argv[1], 'w') # set it to write after every new entry is received in case of early termination out = Unbuffered(out) out.write('---\n') for e in entryGen(6636,30429): yamline = yaml.dump(e) out.write('- {0:s}'.format(yamline)) #print yamline out.closed