class Container(): _THRESHOLD_FOR_CLEANING = eval( parse_config.get('detection', 'threshold_for_cleaning')) _CAPACITY_FOR_CLEANING = eval( parse_config.get('detection', 'capacity_for_cleaning')) def __init__(self): self.container = {} def get(self, _id, _timestamp): sig_scores = [] codes = [ get_hash(h(), repr(_id)) % 2**_BIT_COUNT for h in hash_available ] record = [] for code in codes: if code in self.container: sig_scores.append(self.container[code]) record.append(code) else: if _SIGNI_TYPE == 's': sig_score = fast_signi.SignificanceScorer() self.container[code] = sig_score sig_scores.append(self.container[code]) record.append(0) return sig_scores, record
def process(self, sig_instance, sig_list=None): _t, _count, _ewma, _ewmvar, _sig, _keywords = sig_instance if _t < self.start_time or _t > self.end_time: return 0. if eval(parse_config.get('output', 'debug_info')): if sig_list: # for debugging print('-----------------------') for sig_ in sig_list: print(('__sig__', sig_)) print('-----------------------') create_new = True for thread in self.threads: if thread.add_to_thread(sig_instance): create_new = False break if create_new: thread = Slice() thread.new_thread(sig_instance) self.threads.append(thread) return _sig return 0.
def __init__(self, _stream): self.stream = _stream self.threads = list() _window_size = eval(parse_config.get('significance', 'window_size')) _cycle = eval(parse_config.get('significance', 'cycle')) _average = eval(parse_config.get('significance', 'average')) fast_signi.SignificanceScorer.set_window_size(_window_size, _cycle, _average) _start_time = parse_config.get('detection', 'start_time') _end_time = parse_config.get('detection', 'end_time') self.processor = signi_processor.SigniProcessor() self.start_time = datetime.strptime(_start_time, '%Y-%m-%d %H:%M:%S') self.end_time = datetime.strptime(_end_time, '%Y-%m-%d %H:%M:%S')
def __next__(self): ptweet = next(self.stream) if ptweet is stream.End_Of_Stream: return stream.End_Of_Stream if ptweet is None: return None sig_instance, sig_list = self.processor.process(ptweet) # print(sig_instance) if sig_instance is not None: output = self.process(sig_instance, sig_list) if eval(parse_config.get('output', 'debug_info')): print(sig_instance) return ptweet, output return ptweet, 0.0
async def get_game_time(self, game_id: str, context: Any) -> GameTime: time_played = config.get("minutes") last_played = 1514768400 return GameTime(game_id, time_played, last_played)
import stream from parse_config import config as parse_config import signi_processor from datetime import datetime, timedelta import fast_signi _THREAD_GAP = eval(parse_config.get('detection', 'thread_gap')) class Slice: def __init__(self): self.start = 0.0 self.end = 0.0 self.keywords = None self.sig = 0.0 self.first_sig = 0.0 self.thread = [] self.first_keywords = None def new_thread(self, sig_instance): _t, _count, _ewma, _ewmvar, _sig, _keywords = sig_instance self.start = _t self.end = _t _kw1 = _keywords[0] _kw2 = _keywords[1] self.keywords = set([_kw1, _kw2]) self.thread.append(sig_instance) self.first_sig = _sig self.sig = _sig self.first_keywords = _kw1 + ',' + _kw2
from nltk.stem.lancaster import * from nltk.stem.snowball import * from nltk.stem.porter import * from parse_config import config stemmer = None if config.get('pre_process', 'stemmer') == 'Snowball': stemmer = SnowballStemmer("english") if config.get('pre_process', 'stemmer') == 'Porter': stemmer = PorterStemmer() if config.get('pre_process', 'stemmer') == 'Lancaster': stemmer = LancasterStemmer() def stem(word): if stemmer is None: print('none') return word try: ret = stemmer.stem(word) ret = str(ret) except: ret = word return ret
from parse_config import config as parse_config import fast_signi import stemmer import hashlib _SIGNI_THRESHOLD = eval(parse_config.get('detection', 'detection_threshold')) _SIGNI_TYPE = parse_config.get('detection', 'detection_signi_type') _BIT_COUNT = eval(parse_config.get('detection', 'bit_count')) # hash_available=[hashlib.md5, hashlib.sha1, hashlib.sha224, hashlib.sha256, hashlib.sha384,hashlib.sha512] hash_available = [hashlib.md5] def get_hash(hash_function, x: str): """Returns a given string's hash value, obtained by the given hashlib instance.""" hash_function.update(x.encode()) return int.from_bytes(hash_function.digest(), byteorder="big") class Container(): _THRESHOLD_FOR_CLEANING = eval( parse_config.get('detection', 'threshold_for_cleaning')) _CAPACITY_FOR_CLEANING = eval( parse_config.get('detection', 'capacity_for_cleaning')) def __init__(self): self.container = {}