Example #1
0
    def _build_bohr(self):
        self.bohr = TaskBohr()
        reader = Reader(self.file)

        words = deque()

        for index, word in enumerate(reader.read_words()):
            words.append((index, word))
            if len(words) >= self.MAX_KEY_SIZE:
                self._add_word(words)
                words.popleft()

        while len(words) > 0:
            self._add_word(words)
            words.popleft()
Example #2
0
class TestReader(unittest.TestCase):

    reader = Reader('unit-test', Queue(), Queue())

    def test_parse_log_line(self):
        fixture_line = '199.72.81.55 - - [01/Jul/1995:00:00:01 -0400] "GET /history/apollo/ HTTP/1.0" 200 6245'
        formatted_line = {
            'remote_host': '199.72.81.55',
            'user_identity': '-',
            'user_name': '-',
            'datetime': datetime(1995, 7, 1, 4, 0, 1),
            'request': 'GET /history/apollo/ HTTP/1.0',
            'status_code': 200,
            'response_size': 6245,
            'section': '/history'
        }
        self.assertEqual(formatted_line, self.reader.parse_log_line(fixture_line))

        fixture_line = '199.72.81.55 - jeremy [01/Jul/1995:00:01:43 +0700] "GET / HTTP/1.0" 200 7074'
        formatted_line = {
            'remote_host': '199.72.81.55',
            'user_identity': '-',
            'user_name': 'jeremy',
            'datetime': datetime(1995, 6, 30, 17, 1, 43),
            'request': 'GET / HTTP/1.0',
            'status_code': 200,
            'response_size': 7074,
            'section': '/'
        }
        self.assertEqual(formatted_line, self.reader.parse_log_line(fixture_line))

        fixture_line = '199.72.81.55 [01/Jul/1995:00:01:43 +0700] "GET / HTTP/1.0" 200'
        self.assertRaises(LineFormatError, lambda: self.reader.parse_log_line(fixture_line))

    def test_get_section(self):
        self.assertEqual('/history', self.reader.get_section('GET /history/apollo/ HTTP/1.0'))
        self.assertEqual('/major-history', self.reader.get_section('GET /major-history/apollo/ HTTP/1.0'))
        self.assertEqual('/minor.history', self.reader.get_section('GET /minor.history/apollo/ HTTP/1.0'))
        self.assertEqual('/', self.reader.get_section('GET /history.php HTTP/1.0'))
        self.assertEqual('/', self.reader.get_section('GET / HTTP/1.0'))
        self.assertRaises(LineFormatError, lambda: self.reader.get_section('test test'))

    def test_parse_datetime(self):
        self.assertEqual(datetime(2006, 12, 7, 18, 23, 54), self.reader.parse_datetime('07/Dec/2006:14:23:54 -0400'))
        self.assertRaises(IndexError, lambda: self.reader.parse_datetime('07/Dec/2006:14:23:54'))
        self.assertRaises(ValueError, lambda: self.reader.parse_datetime('Test test'))
Example #3
0
    def nyan_filter(self, status):
        token = Token()
        reader = Reader()
        api = token.get_key(reader.json_dir())

        print(status.text)
        text = status.text

        # for nyan in nyan_list:
        for nyan in open('./dictionary.txt', 'r'):
            nyan = nyan.replace('\n', '')
            print(nyan)
            if nyan in text:
                print("OUT!! Delete Tweet!! Nyan Nyan Filter Start Up!!")
                for tweet in tweepy.Cursor(api.user_timeline).items():
                    api.destroy_status(tweet.id)
                    break;
                api.update_status("にゃんにゃんフィルター発動!!\n" + datetime.now().strftime("%Y/%m/%d %H:%M:%S"))
            else:
                print("No problem!!")
Example #4
0
        def __init__(self):

        self.read = Reader()
#           self.news_list = ["Today's policy is about global warming", "Donald Trupm is the president of United States", "UCLA is the best school in southern California", "Noor Nakhaei is going to be student at UCLA", "the Boelter Hall is a dungeon", "UCLA is colaborating with Stanford", "Wenhao is meeting Trump", "Trump is in United Kingdom"]
          self.news_list = self.read.read_csv_file("./data/mixed-news/articles-title_only.csv")
          self.graph = Graph(self.news_list)
          self.words = self.graph.get_words()
          self.entities = self.graph.get_entities()
          self.ee_graph = EE(self.news_list)
          self.ec_graph = EC(self.news_list)
          self.cc_graph = CC(self.news_list)
          print("cc", self.cc_graph.get_edges())
          self.kg_graph = KG(self.news_list)
          self.d = 10 #THIS SHOULD BE CHANGED! 4, 10, 18  
          self.S = pd.DataFrame(1, index=self.entities, columns=range(0, self.d))
          self.T = pd.DataFrame(1, index=self.words, columns=range(0, self.d))
          for i in self.S.columns:
            for j in self.S.index:
              self.S[i][j] = randint(0, 10)
          for i in self.T.columns:
            for j in self.T.index:
              self.T[i][j] = randint(0, 10)
Example #5
0
def parse_fun(json_text):
    return parse(Reader(json_text))
Example #6
0
            indexer.createIndex()

    if args.query != None:
        #if not os.path.isfile(idxfile):
        #    raise Exception("Could not find indexfile: {}".format(idxfile))
        if args.analyzer == None or args.language == 'all':
            raise ValueError(
                "To retrieve query you must specify analyzer and language")
        searcher = Searcher(index_path=args.index,
                            lang=args.language,
                            analyzer=args.analyzer,
                            dataset=args.dataset)
        searcher.queryTest(args.query)

    if args.run == 'reader':
        reader = Reader()
        reader.run(lang=args.lang,
                   analyzer=args.analyzer,
                   dataset=args.dataset)
    if args.metric == 'dist':
        metrics.hits(dataset=args.dataset,
                     langContext=args.language,
                     langQuestion=args.language,
                     distant=True,
                     k=50)

    if args.metric == 'hit@k':
        metrics.hits(dataset=args.dataset,
                     langContext=args.language,
                     langQuestion=args.language,
                     distant=False,
Example #7
0
class GUI(QtWidgets.QWidget):

    __reader = Reader()
    __viewer = Viewer()
    __glWidget = glWidget()
    __slider = QtWidgets.QSlider(QtCore.Qt.Horizontal)
    __draws = {
        "Текстурирование": __viewer.paint_texture,
        "Прямоугольники 2*n + 2 вершин": __viewer.paint_quadstrip,
        "Прямоугольники 4*n вершин": __viewer.paint_quads
    }
    __curr_draw = "Текстурирование"

    def __init__(self, parent=None):
        super().__init__(parent)
        self.__grid = QtWidgets.QGridLayout(self)
        self.__setup_main_widget()

    def __setup_main_widget(self):
        self.__glWidget.render.connect(self.__viewer.paint_texture)
        self.__grid.addWidget(self.__glWidget, 0, 0, 10, 2)

        self.__slider.sliderReleased.connect(self.__connect_value_changed)
        self.__grid.addWidget(self.__slider, 11, 0)
        self.__curr_slider = QtWidgets.QLabel()
        self.__curr_slider.setText("0")
        self.__grid.addWidget(self.__curr_slider, 11, 1)

        button = QtWidgets.QPushButton()
        button.setText("Open Tomogram")
        button.clicked.connect(self.__connect_open_tomogram)
        self.__grid.addWidget(button, 12, 0, 1, 2)

        draw_list = QtWidgets.QComboBox()
        draw_list.addItems(self.__draws.keys())
        draw_list.activated[str].connect(self.__connect_chande_draw)
        self.__grid.addWidget(draw_list, 13, 0, 1, 2)

        self.__min_input = QtWidgets.QLineEdit()
        self.__min_input.setText("0")
        self.__grid.addWidget(self.__min_input, 14, 0)
        self.__lenght_input = QtWidgets.QLineEdit()
        self.__lenght_input.setText("2000")
        self.__grid.addWidget(self.__lenght_input, 14, 1)

        button = QtWidgets.QPushButton()
        button.setText("Set transfer parameters")
        button.clicked.connect(self.__connect_transfer_parameters)
        self.__grid.addWidget(button, 15, 0, 1, 2)

        button = QtWidgets.QPushButton()
        button.setText("Start render")
        button.clicked.connect(self.__start_render)
        self.__grid.addWidget(button, 16, 0, 1, 2)

    def __connect_chande_draw(self, draw_name: str):
        self.__glWidget.render.disconnect(self.__draws[self.__curr_draw])
        self.__curr_draw = draw_name
        self.__glWidget.render.connect(self.__draws[self.__curr_draw])

    def __connect_value_changed(self):
        value = self.__slider.value()
        self.__curr_slider.setText(str(value))
        self.__viewer.set_layer(value)
        self.__start_render()

    def __connect_transfer_parameters(self):
        min, lenght = int(self.__min_input.text()), int(
            self.__lenght_input.text())
        self.__viewer.set_transfer_parameters(min, lenght)

    def __connect_open_tomogram(self):
        tomogram_path = QtWidgets.QFileDialog.getOpenFileName(
            self, "Open Tomogram", ".")[0]
        if not tomogram_path: return
        shape, tomogram = self.__reader.Read(tomogram_path)
        self.__slider.setRange(0, shape[2] - 1)
        self.__slider.setValue(0)
        self.__curr_slider.setText("0")
        w, h = self.__glWidget.size().width(), self.__glWidget.size().height()
        min, lenght = int(self.__min_input.text()), int(
            self.__lenght_input.text())
        self.__viewer.set_tomogram(shape, tomogram)
        self.__viewer.set_transfer_parameters(min, lenght)
        self.__viewer.setup_view(w, h)

    def __start_render(self):
        self.__glWidget.update()
Example #8
0
 def setUp(self):
     self.reader = Reader()
     self.tokenized_text = word_tokenize(TEXT)
     self.classified_text = self.reader.st.tag(self.tokenized_text)
Example #9
0
def runf1(conn, args):
    # evaluation dataset
    # english context so that answer is in english
    data = MLQADataset(args.dataset, 'en', args.langQuestion)

    # initialize searcher
    init(conn, 'wiki', args)

    # initialise reader
    print("Reader")
    reader = Reader(model="models/distilbert-base-uncased-distilled-squad/",
                    tokenizer="models/distilbert-uncased-my-tok")

    # initialise translator
    print("Translator")
    languages = {args.langQuestion, args.langSearch, 'en'}
    translator = Translator(languages)
    print("Translating between: {}".format(str(languages)))
    counters = {'f1': [], 'tally': 0, 'score': []}

    for doc in data.get():
        questionSearch = translator(doc['question'], args.langQuestion,
                                    args.langSearch)
        #print("questionSearch ", questionSearch.encode('utf-8'))
        search(conn, questionSearch, args.langSearch)

        if args.langSearch == 'en':
            questionRead = questionSearch
        else:
            questionRead = translator(doc['question'], args.langQuestion, 'en')
        #print("questionRead ", questionRead.encode('utf-8'))
        # recv = {'search':[{'id':qid, 'docs':[{'context':'...', 'title':'...', 'score':score}]}]
        bestScore = 0
        recv = recvall(conn)
        for n, docSearch in enumerate(recv['search'][0]['docs']):
            # reader answer question given contexts
            #print("n: ", n)
            #print("contextSearch ", docSearch['context'].encode('utf-8'))
            contextRead = translator(docSearch['context'], args.langSearch,
                                     'en')
            #print("contextRead ", contextRead.encode('utf-8'))
            _, answerRead, score = reader(questionRead, contextRead)
            if score >= bestScore:
                bestScore = score
                bestAnswer = answerRead
                bestContext = contextRead

        #print("goldAnswer: ",doc['answer'].encode('utf-8'))
        #print("Answer:     ",bestAnswer.encode('utf-8'))
        counters['f1'].append(f1_drqa(bestAnswer, doc['answer']))
        counters['tally'] += 1
        counters['score'].append(bestScore)
        # test
        if args.stop != 0 and counters['tally'] >= args.stop:
            print("Stoping at: ", counters['tally'])
            break
        #if i > 1:
        #    break

    f1 = np.array(counters['f1'])
    exact_match = f1[f1 == 1.0].sum() / f1.size
    print("Exact match: {}".format(exact_match))
    print("F1 mean: {}".format(f1.mean()))
    print("Mean score: {}".format(sum(counters['score']) / counters['tally']))
    print("Total: {}".format(counters['tally']))
    if args.save_as:
        print("Writing to: ", args.save_as)
        with open(args.save_as, "w") as fp:
            json.dump(counters, fp)

    close(conn, args.stop_server)

    return f1.mean()
Example #10
0
def read(args):
    """reader function"""
    db_file = args.wiki_db_file
    reader_feature_file = args.reader_feature_file
    reader_example_file = args.reader_example_file
    encoder_ck_file = args.reader_encoder_ck_file
    downstream_ck_file = args.reader_downstream_ck_file
    albert_model_path = args.albert_model_path
    reader_result_file = args.reader_result_file
    seed = args.seed
    sp_threshold = args.sp_threshold
    seq_len = args.seq_len
    batch_size = args.reader_batch_size
    para_limit = args.max_para_num
    sent_limit = args.max_sent_num

    random.seed(seed)
    np.random.seed(seed)

    t1 = time()

    doc_db = DocDB(db_file)

    generator = DataGenerator(feature_file_path=reader_feature_file,
                              example_file_path=reader_example_file,
                              batch_size=batch_size,
                              seq_len=seq_len,
                              para_limit=para_limit,
                              sent_limit=sent_limit,
                              task_type="reader")
    example_dict = generator.example_dict
    feature_dict = generator.feature_dict
    answer_dict = defaultdict(lambda: defaultdict(list))
    new_answer_dict = {}
    total_sp_dict = defaultdict(list)
    new_total_sp_dict = defaultdict(list)

    tokenizer = AlbertTokenizer.from_pretrained(albert_model_path)
    new_tokens = ['[q]', '[/q]', '<t>', '</t>', '[s]']
    tokenizer.add_tokens(new_tokens)

    reader = Reader(batch_size=batch_size,
                    encoder_ck_file=encoder_ck_file,
                    downstream_ck_file=downstream_ck_file)

    print("start reading ...")

    for _, batch in tqdm(enumerate(generator)):
        input_ids = Tensor(batch["context_idxs"], mstype.int32)
        attn_mask = Tensor(batch["context_mask"], mstype.int32)
        token_type_ids = Tensor(batch["segment_idxs"], mstype.int32)
        context_mask = Tensor(batch["context_mask"], mstype.float32)
        square_mask = Tensor(batch["square_mask"], mstype.float32)
        packing_mask = Tensor(batch["query_mapping"], mstype.float32)
        para_start_mapping = Tensor(batch["para_start_mapping"],
                                    mstype.float32)
        sent_end_mapping = Tensor(batch["sent_end_mapping"], mstype.float32)
        unique_ids = batch["unique_ids"]
        sent_names = batch["sent_names"]
        cache_mask = Tensor(
            np.tril(np.triu(np.ones((seq_len, seq_len)), 0), 30),
            mstype.float32)

        _, _, q_type, _, sent_logit, y1, y2 = reader(
            input_ids, attn_mask, token_type_ids, context_mask, square_mask,
            packing_mask, cache_mask, para_start_mapping, sent_end_mapping)

        type_prob = ops.Softmax()(q_type).asnumpy()

        answer_dict_ = convert_to_tokens(example_dict, feature_dict,
                                         batch['ids'],
                                         y1.asnumpy().tolist(),
                                         y2.asnumpy().tolist(),
                                         type_prob, tokenizer,
                                         sent_logit.asnumpy(), sent_names,
                                         unique_ids)
        for q_id in answer_dict_:
            answer_dict[q_id] = answer_dict_[q_id]

    for q_id in answer_dict:
        res = answer_dict[q_id]
        answer_text_ = res[0]
        sent_ = res[1]
        sent_names_ = res[2]
        new_answer_dict[q_id] = answer_text_

        predict_support_np = ops.Sigmoid()(Tensor(sent_,
                                                  mstype.float32)).asnumpy()

        for j in range(predict_support_np.shape[0]):
            if j >= len(sent_names_):
                break
            if predict_support_np[j] > sp_threshold:
                total_sp_dict[q_id].append(sent_names_[j])

    for _id in total_sp_dict:
        _sent_names = total_sp_dict[_id]
        for para in _sent_names:
            title = make_wiki_id(para[0], 0)
            para_original_title = doc_db.get_doc_info(title)[-1]
            para[0] = para_original_title
            new_total_sp_dict[_id].append(para)

    prediction = {'answer': new_answer_dict, 'sp': new_total_sp_dict}

    with open(reader_result_file, 'w') as f:
        json.dump(prediction, f, indent=4)

    t2 = time()

    print(f"reader cost time: {t2-t1} s")
Example #11
0
def json_decode(json_text):
    return parse(Reader(json_text))
Example #12
0
from src.log_simulator import LogSimulator
from time import time
from queue import Queue

DIR_NAME = os.path.dirname(os.path.abspath(__file__))

if __name__ == '__main__':

    read_line_queue = Queue()
    traffic_queue = Queue()
    alert_content = {
        'type': AlertSystem.ALERT_RECOVER_TYPE,
        'to_display': False
    }

    reader = Reader(DIR_NAME + '/data/access-log.log', read_line_queue,
                    traffic_queue)
    displayer = Displayer(read_line_queue, alert_content, 10, True)
    alert_system = AlertSystem(80, traffic_queue, alert_content, 120)
    log_simulator = LogSimulator(DIR_NAME + '/data/access-log.log',
                                 'localhost', ['/', '/section1'])

    current_time = time()

    log_simulator.start()
    reader.start()
    displayer.start()
    alert_system.start()

    while time() - current_time <= 120:
        log_simulator.resume()
        reader.resume()
Example #13
0
 def __init__(self, f,  start, goal):
     self.reader = Reader(f)
     self.start = tuple(map(int, start.split(',')))
     self.goal = tuple(map(int, goal.split(',')))
     self.expanded = []
Example #14
0
if __name__ == '__main__':

    try:

        config = ConfigLoader(DIR_NAME + '/config.ini')
        parameters = config.configure_threads()

        read_line_queue = Queue()
        traffic_queue = Queue()
        alert_content = {
            'type': AlertSystem.ALERT_RECOVER_TYPE,
            'to_display': False
        }

        reader = Reader(input_queue=read_line_queue,
                        input_traffic_queue=traffic_queue,
                        **parameters['reader'])
        displayer = Displayer(output_queue=read_line_queue,
                              alert_content=alert_content,
                              **parameters['displayer'])
        alert_system = AlertSystem(output_traffic_queue=traffic_queue,
                                   alert_content=alert_content,
                                   **parameters['alert_system'])

        has_simulator = False
        log_simulator = None

        if 'log_simulator' in parameters.keys(
        ) and parameters['log_simulator'] is not None:
            log_simulator = LogSimulator(**parameters['log_simulator'])
            has_simulator = True