def _build_bohr(self): self.bohr = TaskBohr() reader = Reader(self.file) words = deque() for index, word in enumerate(reader.read_words()): words.append((index, word)) if len(words) >= self.MAX_KEY_SIZE: self._add_word(words) words.popleft() while len(words) > 0: self._add_word(words) words.popleft()
class TestReader(unittest.TestCase): reader = Reader('unit-test', Queue(), Queue()) def test_parse_log_line(self): fixture_line = '199.72.81.55 - - [01/Jul/1995:00:00:01 -0400] "GET /history/apollo/ HTTP/1.0" 200 6245' formatted_line = { 'remote_host': '199.72.81.55', 'user_identity': '-', 'user_name': '-', 'datetime': datetime(1995, 7, 1, 4, 0, 1), 'request': 'GET /history/apollo/ HTTP/1.0', 'status_code': 200, 'response_size': 6245, 'section': '/history' } self.assertEqual(formatted_line, self.reader.parse_log_line(fixture_line)) fixture_line = '199.72.81.55 - jeremy [01/Jul/1995:00:01:43 +0700] "GET / HTTP/1.0" 200 7074' formatted_line = { 'remote_host': '199.72.81.55', 'user_identity': '-', 'user_name': 'jeremy', 'datetime': datetime(1995, 6, 30, 17, 1, 43), 'request': 'GET / HTTP/1.0', 'status_code': 200, 'response_size': 7074, 'section': '/' } self.assertEqual(formatted_line, self.reader.parse_log_line(fixture_line)) fixture_line = '199.72.81.55 [01/Jul/1995:00:01:43 +0700] "GET / HTTP/1.0" 200' self.assertRaises(LineFormatError, lambda: self.reader.parse_log_line(fixture_line)) def test_get_section(self): self.assertEqual('/history', self.reader.get_section('GET /history/apollo/ HTTP/1.0')) self.assertEqual('/major-history', self.reader.get_section('GET /major-history/apollo/ HTTP/1.0')) self.assertEqual('/minor.history', self.reader.get_section('GET /minor.history/apollo/ HTTP/1.0')) self.assertEqual('/', self.reader.get_section('GET /history.php HTTP/1.0')) self.assertEqual('/', self.reader.get_section('GET / HTTP/1.0')) self.assertRaises(LineFormatError, lambda: self.reader.get_section('test test')) def test_parse_datetime(self): self.assertEqual(datetime(2006, 12, 7, 18, 23, 54), self.reader.parse_datetime('07/Dec/2006:14:23:54 -0400')) self.assertRaises(IndexError, lambda: self.reader.parse_datetime('07/Dec/2006:14:23:54')) self.assertRaises(ValueError, lambda: self.reader.parse_datetime('Test test'))
def nyan_filter(self, status): token = Token() reader = Reader() api = token.get_key(reader.json_dir()) print(status.text) text = status.text # for nyan in nyan_list: for nyan in open('./dictionary.txt', 'r'): nyan = nyan.replace('\n', '') print(nyan) if nyan in text: print("OUT!! Delete Tweet!! Nyan Nyan Filter Start Up!!") for tweet in tweepy.Cursor(api.user_timeline).items(): api.destroy_status(tweet.id) break; api.update_status("にゃんにゃんフィルター発動!!\n" + datetime.now().strftime("%Y/%m/%d %H:%M:%S")) else: print("No problem!!")
def __init__(self): self.read = Reader() # self.news_list = ["Today's policy is about global warming", "Donald Trupm is the president of United States", "UCLA is the best school in southern California", "Noor Nakhaei is going to be student at UCLA", "the Boelter Hall is a dungeon", "UCLA is colaborating with Stanford", "Wenhao is meeting Trump", "Trump is in United Kingdom"] self.news_list = self.read.read_csv_file("./data/mixed-news/articles-title_only.csv") self.graph = Graph(self.news_list) self.words = self.graph.get_words() self.entities = self.graph.get_entities() self.ee_graph = EE(self.news_list) self.ec_graph = EC(self.news_list) self.cc_graph = CC(self.news_list) print("cc", self.cc_graph.get_edges()) self.kg_graph = KG(self.news_list) self.d = 10 #THIS SHOULD BE CHANGED! 4, 10, 18 self.S = pd.DataFrame(1, index=self.entities, columns=range(0, self.d)) self.T = pd.DataFrame(1, index=self.words, columns=range(0, self.d)) for i in self.S.columns: for j in self.S.index: self.S[i][j] = randint(0, 10) for i in self.T.columns: for j in self.T.index: self.T[i][j] = randint(0, 10)
def parse_fun(json_text): return parse(Reader(json_text))
indexer.createIndex() if args.query != None: #if not os.path.isfile(idxfile): # raise Exception("Could not find indexfile: {}".format(idxfile)) if args.analyzer == None or args.language == 'all': raise ValueError( "To retrieve query you must specify analyzer and language") searcher = Searcher(index_path=args.index, lang=args.language, analyzer=args.analyzer, dataset=args.dataset) searcher.queryTest(args.query) if args.run == 'reader': reader = Reader() reader.run(lang=args.lang, analyzer=args.analyzer, dataset=args.dataset) if args.metric == 'dist': metrics.hits(dataset=args.dataset, langContext=args.language, langQuestion=args.language, distant=True, k=50) if args.metric == 'hit@k': metrics.hits(dataset=args.dataset, langContext=args.language, langQuestion=args.language, distant=False,
class GUI(QtWidgets.QWidget): __reader = Reader() __viewer = Viewer() __glWidget = glWidget() __slider = QtWidgets.QSlider(QtCore.Qt.Horizontal) __draws = { "Текстурирование": __viewer.paint_texture, "Прямоугольники 2*n + 2 вершин": __viewer.paint_quadstrip, "Прямоугольники 4*n вершин": __viewer.paint_quads } __curr_draw = "Текстурирование" def __init__(self, parent=None): super().__init__(parent) self.__grid = QtWidgets.QGridLayout(self) self.__setup_main_widget() def __setup_main_widget(self): self.__glWidget.render.connect(self.__viewer.paint_texture) self.__grid.addWidget(self.__glWidget, 0, 0, 10, 2) self.__slider.sliderReleased.connect(self.__connect_value_changed) self.__grid.addWidget(self.__slider, 11, 0) self.__curr_slider = QtWidgets.QLabel() self.__curr_slider.setText("0") self.__grid.addWidget(self.__curr_slider, 11, 1) button = QtWidgets.QPushButton() button.setText("Open Tomogram") button.clicked.connect(self.__connect_open_tomogram) self.__grid.addWidget(button, 12, 0, 1, 2) draw_list = QtWidgets.QComboBox() draw_list.addItems(self.__draws.keys()) draw_list.activated[str].connect(self.__connect_chande_draw) self.__grid.addWidget(draw_list, 13, 0, 1, 2) self.__min_input = QtWidgets.QLineEdit() self.__min_input.setText("0") self.__grid.addWidget(self.__min_input, 14, 0) self.__lenght_input = QtWidgets.QLineEdit() self.__lenght_input.setText("2000") self.__grid.addWidget(self.__lenght_input, 14, 1) button = QtWidgets.QPushButton() button.setText("Set transfer parameters") button.clicked.connect(self.__connect_transfer_parameters) self.__grid.addWidget(button, 15, 0, 1, 2) button = QtWidgets.QPushButton() button.setText("Start render") button.clicked.connect(self.__start_render) self.__grid.addWidget(button, 16, 0, 1, 2) def __connect_chande_draw(self, draw_name: str): self.__glWidget.render.disconnect(self.__draws[self.__curr_draw]) self.__curr_draw = draw_name self.__glWidget.render.connect(self.__draws[self.__curr_draw]) def __connect_value_changed(self): value = self.__slider.value() self.__curr_slider.setText(str(value)) self.__viewer.set_layer(value) self.__start_render() def __connect_transfer_parameters(self): min, lenght = int(self.__min_input.text()), int( self.__lenght_input.text()) self.__viewer.set_transfer_parameters(min, lenght) def __connect_open_tomogram(self): tomogram_path = QtWidgets.QFileDialog.getOpenFileName( self, "Open Tomogram", ".")[0] if not tomogram_path: return shape, tomogram = self.__reader.Read(tomogram_path) self.__slider.setRange(0, shape[2] - 1) self.__slider.setValue(0) self.__curr_slider.setText("0") w, h = self.__glWidget.size().width(), self.__glWidget.size().height() min, lenght = int(self.__min_input.text()), int( self.__lenght_input.text()) self.__viewer.set_tomogram(shape, tomogram) self.__viewer.set_transfer_parameters(min, lenght) self.__viewer.setup_view(w, h) def __start_render(self): self.__glWidget.update()
def setUp(self): self.reader = Reader() self.tokenized_text = word_tokenize(TEXT) self.classified_text = self.reader.st.tag(self.tokenized_text)
def runf1(conn, args): # evaluation dataset # english context so that answer is in english data = MLQADataset(args.dataset, 'en', args.langQuestion) # initialize searcher init(conn, 'wiki', args) # initialise reader print("Reader") reader = Reader(model="models/distilbert-base-uncased-distilled-squad/", tokenizer="models/distilbert-uncased-my-tok") # initialise translator print("Translator") languages = {args.langQuestion, args.langSearch, 'en'} translator = Translator(languages) print("Translating between: {}".format(str(languages))) counters = {'f1': [], 'tally': 0, 'score': []} for doc in data.get(): questionSearch = translator(doc['question'], args.langQuestion, args.langSearch) #print("questionSearch ", questionSearch.encode('utf-8')) search(conn, questionSearch, args.langSearch) if args.langSearch == 'en': questionRead = questionSearch else: questionRead = translator(doc['question'], args.langQuestion, 'en') #print("questionRead ", questionRead.encode('utf-8')) # recv = {'search':[{'id':qid, 'docs':[{'context':'...', 'title':'...', 'score':score}]}] bestScore = 0 recv = recvall(conn) for n, docSearch in enumerate(recv['search'][0]['docs']): # reader answer question given contexts #print("n: ", n) #print("contextSearch ", docSearch['context'].encode('utf-8')) contextRead = translator(docSearch['context'], args.langSearch, 'en') #print("contextRead ", contextRead.encode('utf-8')) _, answerRead, score = reader(questionRead, contextRead) if score >= bestScore: bestScore = score bestAnswer = answerRead bestContext = contextRead #print("goldAnswer: ",doc['answer'].encode('utf-8')) #print("Answer: ",bestAnswer.encode('utf-8')) counters['f1'].append(f1_drqa(bestAnswer, doc['answer'])) counters['tally'] += 1 counters['score'].append(bestScore) # test if args.stop != 0 and counters['tally'] >= args.stop: print("Stoping at: ", counters['tally']) break #if i > 1: # break f1 = np.array(counters['f1']) exact_match = f1[f1 == 1.0].sum() / f1.size print("Exact match: {}".format(exact_match)) print("F1 mean: {}".format(f1.mean())) print("Mean score: {}".format(sum(counters['score']) / counters['tally'])) print("Total: {}".format(counters['tally'])) if args.save_as: print("Writing to: ", args.save_as) with open(args.save_as, "w") as fp: json.dump(counters, fp) close(conn, args.stop_server) return f1.mean()
def read(args): """reader function""" db_file = args.wiki_db_file reader_feature_file = args.reader_feature_file reader_example_file = args.reader_example_file encoder_ck_file = args.reader_encoder_ck_file downstream_ck_file = args.reader_downstream_ck_file albert_model_path = args.albert_model_path reader_result_file = args.reader_result_file seed = args.seed sp_threshold = args.sp_threshold seq_len = args.seq_len batch_size = args.reader_batch_size para_limit = args.max_para_num sent_limit = args.max_sent_num random.seed(seed) np.random.seed(seed) t1 = time() doc_db = DocDB(db_file) generator = DataGenerator(feature_file_path=reader_feature_file, example_file_path=reader_example_file, batch_size=batch_size, seq_len=seq_len, para_limit=para_limit, sent_limit=sent_limit, task_type="reader") example_dict = generator.example_dict feature_dict = generator.feature_dict answer_dict = defaultdict(lambda: defaultdict(list)) new_answer_dict = {} total_sp_dict = defaultdict(list) new_total_sp_dict = defaultdict(list) tokenizer = AlbertTokenizer.from_pretrained(albert_model_path) new_tokens = ['[q]', '[/q]', '<t>', '</t>', '[s]'] tokenizer.add_tokens(new_tokens) reader = Reader(batch_size=batch_size, encoder_ck_file=encoder_ck_file, downstream_ck_file=downstream_ck_file) print("start reading ...") for _, batch in tqdm(enumerate(generator)): input_ids = Tensor(batch["context_idxs"], mstype.int32) attn_mask = Tensor(batch["context_mask"], mstype.int32) token_type_ids = Tensor(batch["segment_idxs"], mstype.int32) context_mask = Tensor(batch["context_mask"], mstype.float32) square_mask = Tensor(batch["square_mask"], mstype.float32) packing_mask = Tensor(batch["query_mapping"], mstype.float32) para_start_mapping = Tensor(batch["para_start_mapping"], mstype.float32) sent_end_mapping = Tensor(batch["sent_end_mapping"], mstype.float32) unique_ids = batch["unique_ids"] sent_names = batch["sent_names"] cache_mask = Tensor( np.tril(np.triu(np.ones((seq_len, seq_len)), 0), 30), mstype.float32) _, _, q_type, _, sent_logit, y1, y2 = reader( input_ids, attn_mask, token_type_ids, context_mask, square_mask, packing_mask, cache_mask, para_start_mapping, sent_end_mapping) type_prob = ops.Softmax()(q_type).asnumpy() answer_dict_ = convert_to_tokens(example_dict, feature_dict, batch['ids'], y1.asnumpy().tolist(), y2.asnumpy().tolist(), type_prob, tokenizer, sent_logit.asnumpy(), sent_names, unique_ids) for q_id in answer_dict_: answer_dict[q_id] = answer_dict_[q_id] for q_id in answer_dict: res = answer_dict[q_id] answer_text_ = res[0] sent_ = res[1] sent_names_ = res[2] new_answer_dict[q_id] = answer_text_ predict_support_np = ops.Sigmoid()(Tensor(sent_, mstype.float32)).asnumpy() for j in range(predict_support_np.shape[0]): if j >= len(sent_names_): break if predict_support_np[j] > sp_threshold: total_sp_dict[q_id].append(sent_names_[j]) for _id in total_sp_dict: _sent_names = total_sp_dict[_id] for para in _sent_names: title = make_wiki_id(para[0], 0) para_original_title = doc_db.get_doc_info(title)[-1] para[0] = para_original_title new_total_sp_dict[_id].append(para) prediction = {'answer': new_answer_dict, 'sp': new_total_sp_dict} with open(reader_result_file, 'w') as f: json.dump(prediction, f, indent=4) t2 = time() print(f"reader cost time: {t2-t1} s")
def json_decode(json_text): return parse(Reader(json_text))
from src.log_simulator import LogSimulator from time import time from queue import Queue DIR_NAME = os.path.dirname(os.path.abspath(__file__)) if __name__ == '__main__': read_line_queue = Queue() traffic_queue = Queue() alert_content = { 'type': AlertSystem.ALERT_RECOVER_TYPE, 'to_display': False } reader = Reader(DIR_NAME + '/data/access-log.log', read_line_queue, traffic_queue) displayer = Displayer(read_line_queue, alert_content, 10, True) alert_system = AlertSystem(80, traffic_queue, alert_content, 120) log_simulator = LogSimulator(DIR_NAME + '/data/access-log.log', 'localhost', ['/', '/section1']) current_time = time() log_simulator.start() reader.start() displayer.start() alert_system.start() while time() - current_time <= 120: log_simulator.resume() reader.resume()
def __init__(self, f, start, goal): self.reader = Reader(f) self.start = tuple(map(int, start.split(','))) self.goal = tuple(map(int, goal.split(','))) self.expanded = []
if __name__ == '__main__': try: config = ConfigLoader(DIR_NAME + '/config.ini') parameters = config.configure_threads() read_line_queue = Queue() traffic_queue = Queue() alert_content = { 'type': AlertSystem.ALERT_RECOVER_TYPE, 'to_display': False } reader = Reader(input_queue=read_line_queue, input_traffic_queue=traffic_queue, **parameters['reader']) displayer = Displayer(output_queue=read_line_queue, alert_content=alert_content, **parameters['displayer']) alert_system = AlertSystem(output_traffic_queue=traffic_queue, alert_content=alert_content, **parameters['alert_system']) has_simulator = False log_simulator = None if 'log_simulator' in parameters.keys( ) and parameters['log_simulator'] is not None: log_simulator = LogSimulator(**parameters['log_simulator']) has_simulator = True