def __init__(self): self.__aData = DataAggregator() self.__carousel = None self.__header = None self.__cart = None self.__hangarCameraLocation = None return
def _execute_checkers(self): ''' Executes the checkers for mis matched data. ''' template_handler = TrueTemplateHandler(self.temp_name) mv = MissingValChecker(self.temp_name) da = DataAggregator(self.temp_name) self.df = mv.execute(self.df, template_handler) new_df = da.execute(self.df, template_handler)
class Controller(object): def __init__(self): self.__aData = DataAggregator() self.__carousel = None self.__header = None self.__cart = None self.__hangarCameraLocation = None return def init(self): self.__aData.init() self.__carousel = Carousel(self.__aData) self.__hangarCameraLocation = g_hangarSpace.space.getCameraLocation() g_hangarSpace.space.locateCameraToPreview() g_hangarSpace.onSpaceCreate += self.__onHangarSpaceCreate def fini(self): self.__carousel.fini() self.__aData.fini() self.__carousel = None self.__header = None g_hangarSpace.onSpaceCreate -= self.__onHangarSpaceCreate return def updateTank3DModel(self, isReset = False): viewModel = self.__aData.initialViewModel if isReset else self.__aData.viewModel camouflageIDToSet, newViewData = viewModel[0], viewModel[1:3] if g_hangarSpace.space is not None: hangarSpace = g_hangarSpace.space hangarSpace.updateVehicleCamouflage(camouflageID=camouflageIDToSet) hangarSpace.updateVehicleSticker(newViewData) if self.__hangarCameraLocation is not None and isReset: hangarSpace.setCameraLocation(**self.__hangarCameraLocation) else: hangarSpace.locateCameraToPreview() hangarSpace.clearSelectedEmblemInfo() return @property def carousel(self): return self.__carousel @property def dataAggregator(self): return self.__aData def __onHangarSpaceCreate(self): if g_hangarSpace.space is not None: self.__hangarCameraLocation = g_hangarSpace.space.getCameraLocation() return
class Controller(object): def __init__(self): self.__aData = DataAggregator() self.__carousel = None self.__header = None self.__cart = None self.__hangarCameraLocation = None return def init(self): self.__aData.init() self.__carousel = Carousel(self.__aData) self.__hangarCameraLocation = g_hangarSpace.space.getCameraLocation() g_hangarSpace.space.locateCameraToPreview() g_hangarSpace.onSpaceCreate += self.__onHangarSpaceCreate def fini(self): self.__carousel.fini() self.__aData.fini() self.__carousel = None self.__header = None g_hangarSpace.onSpaceCreate -= self.__onHangarSpaceCreate return def updateTank3DModel(self, isReset=False): viewModel = self.__aData.initialViewModel if isReset else self.__aData.viewModel camouflageIDToSet, newViewData = viewModel[0], viewModel[1:3] if g_hangarSpace.space is not None: hangarSpace = g_hangarSpace.space hangarSpace.updateVehicleCamouflage(camouflageID=camouflageIDToSet) hangarSpace.updateVehicleSticker(newViewData) if self.__hangarCameraLocation is not None and isReset: hangarSpace.setCameraLocation(**self.__hangarCameraLocation) else: hangarSpace.locateCameraToPreview() hangarSpace.clearSelectedEmblemInfo() return @property def carousel(self): return self.__carousel @property def dataAggregator(self): return self.__aData def __onHangarSpaceCreate(self): if g_hangarSpace.space is not None: self.__hangarCameraLocation = g_hangarSpace.space.getCameraLocation( ) return
def init(args): conf = init_conf(args) dc = load_dc(conf) assert (not conf['read_topo'] or not conf['write_topo_and_done'] ), "Both read_topo and write_topo_and_done cannot be True!" topo = load_topo(conf, dc) Comm.set_params(dc, conf['sigmas'][0], conf['sigmas'][1], conf['omegas'][0], conf['omegas'][1], conf['omegas'][2]) Exec.set_params(dc, conf['betas'][0], conf['betas'][1], conf['gammas'][0], conf['gammas'][1], conf['thetas'][0], conf['thetas'][1], conf['lambda_ms'] / 1000) Query.set_params(conf['query_req_bytes'], conf['query_resp_bytes']) DataAggregator.set_params(conf, dc, topo) return conf, dc, topo
def main(): data = DataAggregator.create() with open('../items.json', 'w') as items_output: items_output.write(Util.json_dump(data.get_items())) print 'Successfully wrote items.json' with open('../champions.json', 'w') as champions_output: champions_output.write(Util.json_dump(data.get_champions())) print 'Successfully wrote champions.json'
class Controller(object): def __init__(self): self.__aData = None self.__carousel = None self.__header = None self.__cart = None self.__hangarCameraLocation = None return def init(self): self.__aData = DataAggregator() self.__carousel = Carousel(self.__aData) self.__hangarCameraLocation = g_hangarSpace.space.getCameraLocation() g_hangarSpace.space.locateCameraToPreview() def fini(self): self.__carousel.fini() self.__aData.fini() self.__aData = None self.__carousel = None self.__header = None return def updateTank3DModel(self, isReset = False): if isReset: newViewData = self.__aData.initialViewModel else: newViewData = self.__aData.viewModel[1:3] if g_hangarSpace.space is not None: hangarSpace = g_hangarSpace.space hangarSpace.updateVehicleCamouflage(camouflageID=self.__aData.installed[CUSTOMIZATION_TYPE.CAMOUFLAGE][0].getID()) hangarSpace.updateVehicleSticker(newViewData) if self.__hangarCameraLocation is not None and isReset: hangarSpace.setCameraLocation(**self.__hangarCameraLocation) else: hangarSpace.locateCameraToPreview() hangarSpace.clearSelectedEmblemInfo() return @property def carousel(self): return self.__carousel
def aggregate_data(conf): print('Simulating data aggregation...') results = DataAggregator.aggregate(conf['levels']) tx_stats = DataAggregator.get_tx_stats(1e9) # Gbytes mh_topo = DataAggregator.get_vm_hours_topo() mh_dc = DataAggregator.get_vm_hours_dc() total_aggr_time = sum(result.aggr_time for result in results) alloc_policy = conf['machine']['alloc_policy'] if conf['verbose']: print('Total Aggregation Time: {:.5f} s'.format(total_aggr_time)) print('mapping={}, s_m={}, s_r={}, sensors_per_person={}, alloc_policy={}'\ .format(conf['mapping'], conf['s_m'], conf['s_r'], conf['sensors_per_person'], alloc_policy)) for result in results: print(result) print("Tx Data (mobile/WAN/LAN) = {} Gbytes".format(tx_stats)) print("VM hours per logical level = {}, {}".format( mh_topo, sum(mh_topo))) print("VM hours per cluster level = {}, {}".format(mh_dc, sum(mh_dc))) else: l = [ '{}, {}, {}, {}'.format(conf['mapping'], conf['s_m'], conf['s_r'], alloc_policy) ] l += ['{:5f}'.format(total_aggr_time)] l += [result.to_csv() for result in results] l += [ '{:.5f}, {:.5f}, {:.5f}'.format(tx_stats[0], tx_stats[1], tx_stats[2]) ] l += [ '{:.5f}, {:.5f}, {:.5f}, {:.5f}, {:.5f}'.format( mh_topo[0], mh_topo[1], mh_topo[2], mh_topo[3], sum(mh_topo)) ] l += [ '{:.5f}, {:.5f}, {:.5f}, {:.5f}, {:.5f}'.format( mh_dc[0], mh_dc[1], mh_dc[2], mh_dc[3], sum(mh_dc)) ] print('DataAggrResults: {}'.format(', '.join(l)))
def main(): data = DataAggregator.create() seed_ids = data.get_summoner_ids(SEEDING_SUMMONERS) stats, result_ids = data.get_build_data(seed_ids) with open('../stats.json', 'a') as stats_output: for stat in stats: stats_output.write('%s\n' % json.dumps(stat)) with open('../QUERIED_SUMMONERS', 'w') as queried_summoners_output: for id in seed_ids: queried_summoners_output.write('%s\n' % id) with open('../UNQUERIED_SUMMONERS', 'w') as unqueried_summoners_output: for id in result_ids: unqueried_summoners_output.write('%s\n' % id)
def main(): data = DataAggregator.create() with open('../QUERIED_SUMMONERS', 'r') as queried_summoners_input: queried_summoners = queried_summoners_input.readlines() with open('../UNQUERIED_SUMMONERS', 'r') as unqueried_summoners_input: unqueried_summoners = unqueried_summoners_input.readlines() queried_summoners = map(lambda x: x.strip(), queried_summoners) unqueried_summoners = map(lambda x: x.strip(), unqueried_summoners) new_aggregated_summoners = [] for id in unqueried_summoners: if id in queried_summoners: print 'ID %s has already been queried' % id continue stats, aggregated_summoners = data.get_build_data([id]) with open('../stats.json', 'a') as stats_output: for stat in stats: stats_output.write('%s\n' % json.dumps(stat)) with open('../QUERIED_SUMMONERS', 'a') as queried_summoners_output: queried_summoners_output.write('%s\n' % id) queried_summoners.append(id) new_aggregated_summoners += aggregated_summoners time.sleep(QUERY_DELAY) new_unqueried_summoners = [] for summoner in new_aggregated_summoners: if summoner not in queried_summoners: new_unqueried_summoners.append(summoner) with open('../UNQUERIED_SUMMONERS', 'w') as unqueried_summoners_output: for id in new_unqueried_summoners: unqueried_summoners_output.write('%s\n' % id)
def top_mentions(self, X): h = [] for tweet in self.tweets: user_name = tweet.user.screen_name usermentions = [ usermentions.get('screen_name') for usermentions in tweet.entities.get("user_mentions") ] if len(usermentions) > 0: h.extend(usermentions) else: continue c = Counter(h) return c.most_common(X) if __name__ == '__main__': data_helper = DataAggregator() date_range = [date.today().strftime('%Y-%m-%d')] # Only today. df = data_helper.get_data(date_range=date_range) tweet_stats = TwitterStatistics(df) tdf = tweet_stats.get_data() try: print(tdf.to_string()) except: sys.stdout.buffer.write(df.to_string().encode('utf-8'))
def setUp(self): self.mock_template = MockBTemplate("mock_b_template") self.da = DataAggregator("mock_b_template")
class SummarizeNER(object): def __init__(self, df): self.data = df self.cleaned_data = self.get_cleaned_data() self.cleaned_phrases = self.get_ner_tags() def get_cleaned_data(self): return [self.clean(text) for text in self.data['text']] def get_summarized_data(self): wikidf = pd.DataFrame(columns=("NER", "Summary")) wikidf["NER"] = self.cleaned_phrases wikidf["Summary"] = self.get_wiki_summary() return wikidf def del_repeat(self, seq): seen = set() seen_add = seen.add return [x for x in seq if not (x in seen or seen_add(x))] def get_wiki_summary(self, sentences=4): wiki_summary = [] for phrase, i in zip(self.cleaned_phrases, range(len(self.cleaned_phrases))): print("Downloading ({}/{}) wikipedia page...".format( i + 1, len(self.cleaned_phrases)), end="\r") try: summary = wikipedia.summary(phrase, sentences=sentences) except Exception as e: try: a = str(e).splitlines()[1] summary = wikipedia.summary(a, sentences=sentences) except: summary = "No wikipedia page found" pass pass wiki_summary.append(summary) return wiki_summary def clean(self, text, url=True, words_only=True, first_n_sent=(False, 4)): if url: text = re.sub(r'\w+:\/{2}[\d\w-]+(\.[\d\w-]+)*(?:(?:\/[^\s/]*))*', '', text) if words_only: regex = re.compile('[^a-zA-Z]') text = regex.sub(' ', text) if first_n_sent[0]: text = re.match(r'(?:[^.:;]+[.:;]){4}', text).group() return text def get_ner_tags(self): sys.path.append('../preprocess') from nltk.tag.stanford import StanfordNERTagger st = StanfordNERTagger( '../stanford-ner/classifiers/english.all.3class.distsim.crf.ser.gz', '../stanford-ner/stanford-ner.jar') tokenized_list = [ct.split() for ct in self.cleaned_data] NERTags = st.tag_sents(tokenized_list) n = [] for nt in NERTags: n.extend(nt) ids = [] #get the indexes of all words that have NER tags ids = [i for a, i in zip(n, range(len(n))) if a[1] != "O"] a = np.array(ids) consecutive_ids = np.split(a, np.where(np.diff(a) != 1)[0] + 1) phrases = [] for ci in consecutive_ids: phrase = "" tag = "" for id_ in ci: phrase += "{} ".format(n[id_][0]) tag += "{}".format(n[id_][1]) phrases.append(phrase) cleaned_phrases = self.del_repeat(phrases) return cleaned_phrases if __name__ == '__main__': data_helper = DataAggregator() date_range = [date.today().strftime('%Y-%m-%d')] # Only today. df = data_helper.get_data(date_range=date_range) sn = SummarizeNER(df) sd = sn.get_summarized_data() print(sd.endode("UTF-8"))
class SummarizeNER(object): def __init__(self, df): self.data = df self.cleaned_data = self.get_cleaned_data() self.cleaned_phrases = self.get_ner_tags() def get_cleaned_data(self): return [self.clean(text) for text in self.data['text']] def get_summarized_data(self): self.data['NER'] = self.cleaned_phrases self.data['Wiki-NER-Sumarry'] = self.get_wiki_summary() return self.data def del_repeat(self, seq): seen = set() seen_add = seen.add return [x for x in seq if not (x in seen or seen_add(x))] def get_wiki_summary(self, sentences=4): wiki_summary = [] for phrase, i in zip(self.cleaned_phrases, range(len(self.cleaned_phrases))): if phrase != 'N/A': print("Downloading wikipedia pages...".format( i + 1, len(self.cleaned_phrases)), end="\r") try: summary = wikipedia.summary(phrase[0], sentences=sentences) except Exception as e: try: a = str(e).splitlines()[1] summary = wikipedia.summary(a, sentences=sentences) except: summary = "No wikipedia page found" pass pass else: summary = "No wikipedia page found" wiki_summary.append(summary) return wiki_summary def clean(self, text, url=True, words_only=True, first_n_sent=(False, 4)): if url: text = re.sub(r'\w+:\/{2}[\d\w-]+(\.[\d\w-]+)*(?:(?:\/[^\s/]*))*', '', text) if words_only: regex = re.compile('[^a-zA-Z]') text = regex.sub(' ', text) if first_n_sent[0]: text = re.match(r'(?:[^.:;]+[.:;]){4}', text).group() return text def get_ner_tags(self): sys.path.append('../preprocess') from nltk.tag.stanford import StanfordNERTagger st = StanfordNERTagger( '../stanford-ner/classifiers/english.all.3class.distsim.crf.ser.gz', '../stanford-ner/stanford-ner.jar') tokenized_list = [ct.split() for ct in self.cleaned_data] NERTags = st.tag_sents(tokenized_list) tags = [nt for nt in NERTags] ids = [[i for a, i in zip(t, range(len(t))) if a[1] != "O"] for t in tags] phrases = [] for i, t in zip(ids, tags): phrase = "" tt = "N/A" for p, index in zip(i, range(len(i))): if index == len(i) - 1: phrase += "{}".format(t[p][0]) tt = phrase, t[p][1] else: phrase += "{} ".format(t[p][0]) phrases.append(tt) return phrases if __name__ == '__main__': data_helper = DataAggregator() date_range = [date.today().strftime('%Y-%m-%d')] # Only today. df = data_helper.get_data(date_range=date_range) sn = SummarizeNER(df) sd = sn.get_summarized_data() print(sd.endode("UTF-8"))
def init(self): self.__aData = DataAggregator() self.__carousel = Carousel(self.__aData)
def init(self): self.__aData = DataAggregator() self.__carousel = Carousel(self.__aData) self.__hangarCameraLocation = g_hangarSpace.space.getCameraLocation() g_hangarSpace.space.locateCameraToPreview()
def get_aggregate_data(q_out): all_data = retrieve_data(q_out) aggregator = DataAggregator() return aggregator.parse_data(all_data)
class GoogleDataEnhancer(object): def __init__(self, df): self.data = self.get_data(df) self.domains = self.get_domains() self.results = self.google_search() def get_data(self, df): a = df[df['source'] == "twitter"].index.tolist() tweet = [df["raw_data"][index].text for index in a] tweets = [self.clean(t) for t in tweet] a = df[df['source'] == "reddit"].index.tolist() subs = [df["raw_data"][index].title for index in a] data = tweets + subs return data def get_domains(self): with open("../domains.json", "r") as f: domains = json.load(f) return domains def clean(self, text): URLless_text = re.sub( r'\w+:\/{2}[\d\w-]+(\.[\d\w-]+)*(?:(?:\/[^\s/]*))*', '', text) regex = re.compile('[^a-zA-Z]') cleaned_text = regex.sub(' ', URLless_text) return cleaned_text def in_domain(self, url): for d in self.domains: for urls in self.domains[d]: if urls in url: return d else: continue return "" def google_search(self): results = [] gd = GoogleDataHelper() print("* Google Searching Data...") for d, i in zip(self.data, range(len(self.data))): try: d = self.clean(d) print("* * Downloading ({}/{}) query".format( i + 1, len(self.data))) r = gd.get_data(querystring=d) except Exception as e: print("* * cannot download query ({}) because: ({})".format( i, str(e))) r = pd.DataFrame() continue results.append(r) sleep(5) #minimum time to not look like a bot/script print("* Download complete! ") return results def enhance(self): df = pd.DataFrame(columns=(list(self.domains.keys()))) df["data"] = self.data df['results'] = self.results for r, i in zip(self.results, range(len(self.results))): for d in self.domains: df[d][i] = [] types = [] type_dict = {} for url, text in zip(r['author'], r['text']): _type = self.in_domain(url) if _type != "": t = (url, text) df[_type][i].append(t) return df #def wiki_summarize(self): #import wikipedia #self.data if __name__ == '__main__': data_helper = DataAggregator() date_range = [date.today().strftime('%Y-%m-%d')] # Only today. df = data_helper.get_data(date_range=date_range) gde = GoogleDataEnhancer(df) print(gde.enhance())