Beispiel #1
0
 def test_read_words(self):
     words = ["This", "is", "words,", "Anthony!"]
     with tempfile.TemporaryFile(mode="w+") as tmp:
         tmp.write(" ".join(words))
         tmp.seek(0)
         reader = Reader(tmp)
         self.assertEqual(words, [w for w in reader.read_words()])
Beispiel #2
0
class TestReader(unittest.TestCase):
    def setUp(self):
        self.reader = Reader()
        self.tokenized_text = word_tokenize(TEXT)
        self.classified_text = self.reader.st.tag(self.tokenized_text)

    def test_init(self):
        assert TEST1 == self.classified_text

    def test_read_files(self):
        self.lst_news = self.reader.read_files("data/bbc")
        self.assertFalse(len(self.reader.file_names) == 0)
        self.assertTrue(os.access(self.reader.file_names[0], os.R_OK))

    def test_parse_news(self):
        self.lst_news = self.reader.read_files("data/bbc")
        # test on a subset of news articles, e.g. 10 files
        res = self.reader.parse_news(self.lst_news[:10])

    def test_filter_stop_words(self):
        example = [
            'This', 'is', 'a', 'sample', 'sentence', ',', 'showing', 'off',
            'the', 'stop', 'words', 'filtration', '.'
        ]
        res = self.reader.filter_stop_words(example)
        print(res)
        assert res == [
            'sample', 'sentence', 'showing', 'stop', 'words', 'filtration'
        ]

    def test_stem_words(self):
        example = ['game', 'gaming', 'gamed', 'games']
        res = self.reader.stem_words(example)
        assert res == ['game']
Beispiel #3
0
    def setUp(self):
        self.transition = [[0, 2, 3], [3, 0, 4], [3, 4, 0]]
        self.books = [
            Book(0, 1, 5),
            Book(1, 0, 3),
            Book(2, 1, 10),
            Book(3, 1, 2),
            Book(4, 2, 8),
        ]
        self.readers = [
            Reader({
                0: 3,
                1: 2
            }, 0, 10),
            Reader({
                1: 1,
                2: 10,
                3: 3
            }, 0, 10),
            Reader({
                0: 1,
                1: 1,
                2: 1,
                3: 1,
                4: 1
            }, 1, 10),
            Reader({
                2: 5,
                4: 5
            }, 2, 10),
        ]

        self.state = State(self.transition, self.books, self.readers)
Beispiel #4
0
 def test_invalid_feed(self):
     # Assume
     value = 5
     # Action
     reader = Reader()
     # Assert
     with self.assertRaises(TypeError):
         reader.feed(value)
Beispiel #5
0
 def setUp(self):
     self.book_data = {0: 5, 1: 2, 3: 3}
     self.reader = Reader(books=self.book_data, location=1, max_weeks=6)
     self.books = [
         Book(0, 1, 3),
         Book(1, 0, 5),
         Book(2, 1, 4),
         Book(3, 1, 2)
     ]
 def setUp(self):
     self.book_data = {0: 5, 1: 2, 3: 3}
     self.reader = Reader(books=self.book_data, location=1, max_weeks=6)
     self.books = [
         Book(0, 3, 1),
         Book(1, 5, 0),
         Book(2, 4, 1),
         Book(3, 2, 1)
     ]
Beispiel #7
0
    def __init__(self, lst_news, window_size=5):
        """Inits Graph
        Args:
            lst_news: list of string. list of news articles.
        """
        self.window_size = window_size
        self.news = list(lst_news)
        self.reader = Reader()
        self.search = Search()

        self.nodes = self.__create_nodes()
        self.edges = self.__create_edges()
        self.edge_weights = self.__create_weights()
Beispiel #8
0
    def _build_bohr(self):
        self.bohr = TaskBohr()
        reader = Reader(self.file)

        words = deque()

        for index, word in enumerate(reader.read_words()):
            words.append((index, word))
            if len(words) >= self.MAX_KEY_SIZE:
                self._add_word(words)
                words.popleft()

        while len(words) > 0:
            self._add_word(words)
            words.popleft()
 def test_reader_creator(self):
     """
     Create reader test
     """
     res = create_reader(self.line.split()[1:], 12)
     expected = (2, Reader({0: 1, 1: 1, 2: 1, 3: 1, 4: 1}, 1, 12))
     self.assertEqual(expected, res)
Beispiel #10
0
    def post(self, *args, **kwargs):
        _file = self.request.files.get('file')
        _money = "0.00"

        if _file:
            _money = Reader.getMoney(_file[0])

        self.write(json.dumps({"money": _money, "time": int(time.time())}))
Beispiel #11
0
    def test_input_parser(self):
        ip = InputParser()
        for line in self.input_str.split("\n"):
            ip.parse_line(line)

        st = ip.get_state()
        trans = [[0, 2, 3], [3, 0, 4], [3, 4, 0]]
        books = [
            Book(0, 5, 0),
            Book(1, 3, 0),
            Book(2, 10, 1),
            Book(3, 2, 2),
            Book(4, 8, 2),
        ]
        readers = [
            Reader({
                0: 10,
                1: 2
            }, 0, 32),
            Reader({
                1: 1,
                2: 10,
                3: 3
            }, 0, 32),
            Reader({
                0: 1,
                1: 1,
                2: 1,
                3: 1,
                4: 1
            }, 1, 32),
            Reader({
                2: 5,
                4: 5
            }, 2, 32),
        ]
        expected = State(trans, books, readers)
        self.assertEqual(trans, st._transition)
        self.assertEqual(books, st._books)
        self.assertEqual(readers, st._readers)
        self.assertEqual(0, st._score)
        self.assertEqual(expected, st)
Beispiel #12
0
    def nyan_filter(self, status):
        token = Token()
        reader = Reader()
        api = token.get_key(reader.json_dir())

        print(status.text)
        text = status.text

        # for nyan in nyan_list:
        for nyan in open('./dictionary.txt', 'r'):
            nyan = nyan.replace('\n', '')
            print(nyan)
            if nyan in text:
                print("OUT!! Delete Tweet!! Nyan Nyan Filter Start Up!!")
                for tweet in tweepy.Cursor(api.user_timeline).items():
                    api.destroy_status(tweet.id)
                    break;
                api.update_status("にゃんにゃんフィルター発動!!\n" + datetime.now().strftime("%Y/%m/%d %H:%M:%S"))
            else:
                print("No problem!!")
Beispiel #13
0
 def get_data(date_range: DateGen, places: dict):
     reader = Reader()
     for date in date_range:
         for country, cities in places.items():
             for city in cities:
                 reader.get_webpage(date, country, city)
                 reader.append_file("warsaw.csv")
Beispiel #14
0
        def __init__(self):

        self.read = Reader()
#           self.news_list = ["Today's policy is about global warming", "Donald Trupm is the president of United States", "UCLA is the best school in southern California", "Noor Nakhaei is going to be student at UCLA", "the Boelter Hall is a dungeon", "UCLA is colaborating with Stanford", "Wenhao is meeting Trump", "Trump is in United Kingdom"]
          self.news_list = self.read.read_csv_file("./data/mixed-news/articles-title_only.csv")
          self.graph = Graph(self.news_list)
          self.words = self.graph.get_words()
          self.entities = self.graph.get_entities()
          self.ee_graph = EE(self.news_list)
          self.ec_graph = EC(self.news_list)
          self.cc_graph = CC(self.news_list)
          print("cc", self.cc_graph.get_edges())
          self.kg_graph = KG(self.news_list)
          self.d = 10 #THIS SHOULD BE CHANGED! 4, 10, 18  
          self.S = pd.DataFrame(1, index=self.entities, columns=range(0, self.d))
          self.T = pd.DataFrame(1, index=self.words, columns=range(0, self.d))
          for i in self.S.columns:
            for j in self.S.index:
              self.S[i][j] = randint(0, 10)
          for i in self.T.columns:
            for j in self.T.index:
              self.T[i][j] = randint(0, 10)
Beispiel #15
0
class TestReader(unittest.TestCase):

    reader = Reader('unit-test', Queue(), Queue())

    def test_parse_log_line(self):
        fixture_line = '199.72.81.55 - - [01/Jul/1995:00:00:01 -0400] "GET /history/apollo/ HTTP/1.0" 200 6245'
        formatted_line = {
            'remote_host': '199.72.81.55',
            'user_identity': '-',
            'user_name': '-',
            'datetime': datetime(1995, 7, 1, 4, 0, 1),
            'request': 'GET /history/apollo/ HTTP/1.0',
            'status_code': 200,
            'response_size': 6245,
            'section': '/history'
        }
        self.assertEqual(formatted_line, self.reader.parse_log_line(fixture_line))

        fixture_line = '199.72.81.55 - jeremy [01/Jul/1995:00:01:43 +0700] "GET / HTTP/1.0" 200 7074'
        formatted_line = {
            'remote_host': '199.72.81.55',
            'user_identity': '-',
            'user_name': 'jeremy',
            'datetime': datetime(1995, 6, 30, 17, 1, 43),
            'request': 'GET / HTTP/1.0',
            'status_code': 200,
            'response_size': 7074,
            'section': '/'
        }
        self.assertEqual(formatted_line, self.reader.parse_log_line(fixture_line))

        fixture_line = '199.72.81.55 [01/Jul/1995:00:01:43 +0700] "GET / HTTP/1.0" 200'
        self.assertRaises(LineFormatError, lambda: self.reader.parse_log_line(fixture_line))

    def test_get_section(self):
        self.assertEqual('/history', self.reader.get_section('GET /history/apollo/ HTTP/1.0'))
        self.assertEqual('/major-history', self.reader.get_section('GET /major-history/apollo/ HTTP/1.0'))
        self.assertEqual('/minor.history', self.reader.get_section('GET /minor.history/apollo/ HTTP/1.0'))
        self.assertEqual('/', self.reader.get_section('GET /history.php HTTP/1.0'))
        self.assertEqual('/', self.reader.get_section('GET / HTTP/1.0'))
        self.assertRaises(LineFormatError, lambda: self.reader.get_section('test test'))

    def test_parse_datetime(self):
        self.assertEqual(datetime(2006, 12, 7, 18, 23, 54), self.reader.parse_datetime('07/Dec/2006:14:23:54 -0400'))
        self.assertRaises(IndexError, lambda: self.reader.parse_datetime('07/Dec/2006:14:23:54'))
        self.assertRaises(ValueError, lambda: self.reader.parse_datetime('Test test'))
Beispiel #16
0
for i, _file in enumerate(categories):
    print "  " + str(i + 1) + ".", _file

print

category = -1
while category < 0 or category > len(categories):
    category = int(raw_input("Enter the category number: "))

if category != 0:
    category -= 1
    category_dir = join(DIRECTORY, categories[category])

    if len(sys.argv) > 1:
        _id = sys.argv[1]
        random_puzzle =_id + ".krk"
    else:
        cat_files = [f for f in listdir(category_dir) if isfile(join(category_dir, f))]
        random_puzzle = choice(cat_files)

    reader = Reader(join(category_dir, random_puzzle))
    puzzle = KRPuzzle(reader.get_level())

    try:
        puzzle.start()
    except Exception as e:
        print e.message
        puzzle.end()

    # puzzle.output()
Beispiel #17
0
 def setUp(self):
     self.reader = Reader()
     self.tokenized_text = word_tokenize(TEXT)
     self.classified_text = self.reader.st.tag(self.tokenized_text)
def run(fp):
	reader = Reader(fp)
	reader.read_file()
Beispiel #19
0
 def __init__(self):
     cmd.Cmd.__init__(self)
     self.prompt = "wayterm > "
     self.reader = Reader()
Beispiel #20
0
if __name__ == '__main__':

    try:

        config = ConfigLoader(DIR_NAME + '/config.ini')
        parameters = config.configure_threads()

        read_line_queue = Queue()
        traffic_queue = Queue()
        alert_content = {
            'type': AlertSystem.ALERT_RECOVER_TYPE,
            'to_display': False
        }

        reader = Reader(input_queue=read_line_queue,
                        input_traffic_queue=traffic_queue,
                        **parameters['reader'])
        displayer = Displayer(output_queue=read_line_queue,
                              alert_content=alert_content,
                              **parameters['displayer'])
        alert_system = AlertSystem(output_traffic_queue=traffic_queue,
                                   alert_content=alert_content,
                                   **parameters['alert_system'])

        has_simulator = False
        log_simulator = None

        if 'log_simulator' in parameters.keys(
        ) and parameters['log_simulator'] is not None:
            log_simulator = LogSimulator(**parameters['log_simulator'])
            has_simulator = True
Beispiel #21
0
class GUI(QtWidgets.QWidget):

    __reader = Reader()
    __viewer = Viewer()
    __glWidget = glWidget()
    __slider = QtWidgets.QSlider(QtCore.Qt.Horizontal)
    __draws = {
        "Текстурирование": __viewer.paint_texture,
        "Прямоугольники 2*n + 2 вершин": __viewer.paint_quadstrip,
        "Прямоугольники 4*n вершин": __viewer.paint_quads
    }
    __curr_draw = "Текстурирование"

    def __init__(self, parent=None):
        super().__init__(parent)
        self.__grid = QtWidgets.QGridLayout(self)
        self.__setup_main_widget()

    def __setup_main_widget(self):
        self.__glWidget.render.connect(self.__viewer.paint_texture)
        self.__grid.addWidget(self.__glWidget, 0, 0, 10, 2)

        self.__slider.sliderReleased.connect(self.__connect_value_changed)
        self.__grid.addWidget(self.__slider, 11, 0)
        self.__curr_slider = QtWidgets.QLabel()
        self.__curr_slider.setText("0")
        self.__grid.addWidget(self.__curr_slider, 11, 1)

        button = QtWidgets.QPushButton()
        button.setText("Open Tomogram")
        button.clicked.connect(self.__connect_open_tomogram)
        self.__grid.addWidget(button, 12, 0, 1, 2)

        draw_list = QtWidgets.QComboBox()
        draw_list.addItems(self.__draws.keys())
        draw_list.activated[str].connect(self.__connect_chande_draw)
        self.__grid.addWidget(draw_list, 13, 0, 1, 2)

        self.__min_input = QtWidgets.QLineEdit()
        self.__min_input.setText("0")
        self.__grid.addWidget(self.__min_input, 14, 0)
        self.__lenght_input = QtWidgets.QLineEdit()
        self.__lenght_input.setText("2000")
        self.__grid.addWidget(self.__lenght_input, 14, 1)

        button = QtWidgets.QPushButton()
        button.setText("Set transfer parameters")
        button.clicked.connect(self.__connect_transfer_parameters)
        self.__grid.addWidget(button, 15, 0, 1, 2)

        button = QtWidgets.QPushButton()
        button.setText("Start render")
        button.clicked.connect(self.__start_render)
        self.__grid.addWidget(button, 16, 0, 1, 2)

    def __connect_chande_draw(self, draw_name: str):
        self.__glWidget.render.disconnect(self.__draws[self.__curr_draw])
        self.__curr_draw = draw_name
        self.__glWidget.render.connect(self.__draws[self.__curr_draw])

    def __connect_value_changed(self):
        value = self.__slider.value()
        self.__curr_slider.setText(str(value))
        self.__viewer.set_layer(value)
        self.__start_render()

    def __connect_transfer_parameters(self):
        min, lenght = int(self.__min_input.text()), int(
            self.__lenght_input.text())
        self.__viewer.set_transfer_parameters(min, lenght)

    def __connect_open_tomogram(self):
        tomogram_path = QtWidgets.QFileDialog.getOpenFileName(
            self, "Open Tomogram", ".")[0]
        if not tomogram_path: return
        shape, tomogram = self.__reader.Read(tomogram_path)
        self.__slider.setRange(0, shape[2] - 1)
        self.__slider.setValue(0)
        self.__curr_slider.setText("0")
        w, h = self.__glWidget.size().width(), self.__glWidget.size().height()
        min, lenght = int(self.__min_input.text()), int(
            self.__lenght_input.text())
        self.__viewer.set_tomogram(shape, tomogram)
        self.__viewer.set_transfer_parameters(min, lenght)
        self.__viewer.setup_view(w, h)

    def __start_render(self):
        self.__glWidget.update()
Beispiel #22
0
def runf1(conn, args):
    # evaluation dataset
    # english context so that answer is in english
    data = MLQADataset(args.dataset, 'en', args.langQuestion)

    # initialize searcher
    init(conn, 'wiki', args)

    # initialise reader
    print("Reader")
    reader = Reader(model="models/distilbert-base-uncased-distilled-squad/",
                    tokenizer="models/distilbert-uncased-my-tok")

    # initialise translator
    print("Translator")
    languages = {args.langQuestion, args.langSearch, 'en'}
    translator = Translator(languages)
    print("Translating between: {}".format(str(languages)))
    counters = {'f1': [], 'tally': 0, 'score': []}

    for doc in data.get():
        questionSearch = translator(doc['question'], args.langQuestion,
                                    args.langSearch)
        #print("questionSearch ", questionSearch.encode('utf-8'))
        search(conn, questionSearch, args.langSearch)

        if args.langSearch == 'en':
            questionRead = questionSearch
        else:
            questionRead = translator(doc['question'], args.langQuestion, 'en')
        #print("questionRead ", questionRead.encode('utf-8'))
        # recv = {'search':[{'id':qid, 'docs':[{'context':'...', 'title':'...', 'score':score}]}]
        bestScore = 0
        recv = recvall(conn)
        for n, docSearch in enumerate(recv['search'][0]['docs']):
            # reader answer question given contexts
            #print("n: ", n)
            #print("contextSearch ", docSearch['context'].encode('utf-8'))
            contextRead = translator(docSearch['context'], args.langSearch,
                                     'en')
            #print("contextRead ", contextRead.encode('utf-8'))
            _, answerRead, score = reader(questionRead, contextRead)
            if score >= bestScore:
                bestScore = score
                bestAnswer = answerRead
                bestContext = contextRead

        #print("goldAnswer: ",doc['answer'].encode('utf-8'))
        #print("Answer:     ",bestAnswer.encode('utf-8'))
        counters['f1'].append(f1_drqa(bestAnswer, doc['answer']))
        counters['tally'] += 1
        counters['score'].append(bestScore)
        # test
        if args.stop != 0 and counters['tally'] >= args.stop:
            print("Stoping at: ", counters['tally'])
            break
        #if i > 1:
        #    break

    f1 = np.array(counters['f1'])
    exact_match = f1[f1 == 1.0].sum() / f1.size
    print("Exact match: {}".format(exact_match))
    print("F1 mean: {}".format(f1.mean()))
    print("Mean score: {}".format(sum(counters['score']) / counters['tally']))
    print("Total: {}".format(counters['tally']))
    if args.save_as:
        print("Writing to: ", args.save_as)
        with open(args.save_as, "w") as fp:
            json.dump(counters, fp)

    close(conn, args.stop_server)

    return f1.mean()
Beispiel #23
0
def read(args):
    """reader function"""
    db_file = args.wiki_db_file
    reader_feature_file = args.reader_feature_file
    reader_example_file = args.reader_example_file
    encoder_ck_file = args.reader_encoder_ck_file
    downstream_ck_file = args.reader_downstream_ck_file
    albert_model_path = args.albert_model_path
    reader_result_file = args.reader_result_file
    seed = args.seed
    sp_threshold = args.sp_threshold
    seq_len = args.seq_len
    batch_size = args.reader_batch_size
    para_limit = args.max_para_num
    sent_limit = args.max_sent_num

    random.seed(seed)
    np.random.seed(seed)

    t1 = time()

    doc_db = DocDB(db_file)

    generator = DataGenerator(feature_file_path=reader_feature_file,
                              example_file_path=reader_example_file,
                              batch_size=batch_size,
                              seq_len=seq_len,
                              para_limit=para_limit,
                              sent_limit=sent_limit,
                              task_type="reader")
    example_dict = generator.example_dict
    feature_dict = generator.feature_dict
    answer_dict = defaultdict(lambda: defaultdict(list))
    new_answer_dict = {}
    total_sp_dict = defaultdict(list)
    new_total_sp_dict = defaultdict(list)

    tokenizer = AlbertTokenizer.from_pretrained(albert_model_path)
    new_tokens = ['[q]', '[/q]', '<t>', '</t>', '[s]']
    tokenizer.add_tokens(new_tokens)

    reader = Reader(batch_size=batch_size,
                    encoder_ck_file=encoder_ck_file,
                    downstream_ck_file=downstream_ck_file)

    print("start reading ...")

    for _, batch in tqdm(enumerate(generator)):
        input_ids = Tensor(batch["context_idxs"], mstype.int32)
        attn_mask = Tensor(batch["context_mask"], mstype.int32)
        token_type_ids = Tensor(batch["segment_idxs"], mstype.int32)
        context_mask = Tensor(batch["context_mask"], mstype.float32)
        square_mask = Tensor(batch["square_mask"], mstype.float32)
        packing_mask = Tensor(batch["query_mapping"], mstype.float32)
        para_start_mapping = Tensor(batch["para_start_mapping"],
                                    mstype.float32)
        sent_end_mapping = Tensor(batch["sent_end_mapping"], mstype.float32)
        unique_ids = batch["unique_ids"]
        sent_names = batch["sent_names"]
        cache_mask = Tensor(
            np.tril(np.triu(np.ones((seq_len, seq_len)), 0), 30),
            mstype.float32)

        _, _, q_type, _, sent_logit, y1, y2 = reader(
            input_ids, attn_mask, token_type_ids, context_mask, square_mask,
            packing_mask, cache_mask, para_start_mapping, sent_end_mapping)

        type_prob = ops.Softmax()(q_type).asnumpy()

        answer_dict_ = convert_to_tokens(example_dict, feature_dict,
                                         batch['ids'],
                                         y1.asnumpy().tolist(),
                                         y2.asnumpy().tolist(),
                                         type_prob, tokenizer,
                                         sent_logit.asnumpy(), sent_names,
                                         unique_ids)
        for q_id in answer_dict_:
            answer_dict[q_id] = answer_dict_[q_id]

    for q_id in answer_dict:
        res = answer_dict[q_id]
        answer_text_ = res[0]
        sent_ = res[1]
        sent_names_ = res[2]
        new_answer_dict[q_id] = answer_text_

        predict_support_np = ops.Sigmoid()(Tensor(sent_,
                                                  mstype.float32)).asnumpy()

        for j in range(predict_support_np.shape[0]):
            if j >= len(sent_names_):
                break
            if predict_support_np[j] > sp_threshold:
                total_sp_dict[q_id].append(sent_names_[j])

    for _id in total_sp_dict:
        _sent_names = total_sp_dict[_id]
        for para in _sent_names:
            title = make_wiki_id(para[0], 0)
            para_original_title = doc_db.get_doc_info(title)[-1]
            para[0] = para_original_title
            new_total_sp_dict[_id].append(para)

    prediction = {'answer': new_answer_dict, 'sp': new_total_sp_dict}

    with open(reader_result_file, 'w') as f:
        json.dump(prediction, f, indent=4)

    t2 = time()

    print(f"reader cost time: {t2-t1} s")
Beispiel #24
0
def json_decode(json_text):
    return parse(Reader(json_text))
class ReaderTest(unittest.TestCase):
    """
    Reader Test
    """
    def setUp(self):
        self.book_data = {0: 5, 1: 2, 3: 3}
        self.reader = Reader(books=self.book_data, location=1, max_weeks=6)
        self.books = [
            Book(0, 3, 1),
            Book(1, 5, 0),
            Book(2, 4, 1),
            Book(3, 2, 1)
        ]

    def test_creation(self):
        """
        Tests the creation of a reader.
        """
        self.assertEqual(self.book_data, self.reader._books)
        self.assertEqual(1, self.reader._location)
        self.assertEqual([0, 0, 0, 0, 0, 0], self.reader._timing)

    def test_read(self):
        """
        Test simple read book 0.
        """
        self.reader._timing = [3, 2, 2, 1, 1, 1]
        score, time = self.reader.read(self.books[0], 0, 3)
        self.assertTrue(self.books[0].id_book not in self.reader._books)
        self.assertEqual(3, score)
        self.assertEqual(6, time)
        self.assertEqual([3, 3, 3, 2, 2, 2], self.reader._timing)
        score, time = self.reader.read(self.books[3], 4, 5)
        self.assertTrue(self.books[3].id_book not in self.reader._books)
        self.assertEqual(0, score)
        self.assertEqual(7, time)
        self.assertEqual([3, 3, 3, 2, 3, 3], self.reader._timing)

    def test_another_read(self):
        """
        Test complex read book 0.
        """
        self.reader._timing = [3, 2, 3, 3, 1, 1]
        score, time = self.reader.read(self.books[0], 0, 6)
        self.assertTrue(self.books[0].id_book not in self.reader._books)
        self.assertEqual(0, score)
        self.assertEqual(9, time)

    def test_read_not_interested(self):
        """
        Test if Reader is not interested in books
        """
        try:
            self.reader.read(self.books[2], 0, 0)
            self.fail("Readed book not interested")
        except AssertionError:
            pass

    def test_read_different_location(self):
        """
        Test if Book is in different location.
        """
        try:
            self.reader.read(self.books[1], 0, 1)
            self.fail("Readed book was not in the library")
        except AssertionError:
            pass
Beispiel #26
0
        # now, try from the right to the left.
        mark_group_i = len(row)
        for i, group in list(enumerate(groups))[-1::-1]:
            if i in used_groups:
                continue

            group_type = types[i]

            if group_type == self.NULL:
                # if the type is self.NULL quit trying.
                break

            elif group_type == self.MARK:
                mark_group_i -= 1

                if mark_group_i >= 0 and row[mark_group_i] == group:
                    indices.add(mark_group_i)

        return indices

if __name__ == '__main__':
    if len(sys.argv) == 1:
        reader = Reader("levels/lambda.krk")
    else:
        reader = Reader(sys.argv[1])

    _level = reader.get_level()

    puzzle = KRPuzzle(_level)
    puzzle.start()
Beispiel #27
0
class Graph(object):
    """
    Base class which represents the heterogeneous textual graph (undirected graph). G = <V, E>. V is the set of nodes (objects), including 3 types of objects (i.e. new entites, known entities, and contextual words). Entities are words (with label: PERSON, LOCATION, and ORGANIZATION) whereas contextual words are the remaining uni-gram words. New entities are the entities not in DBpedia, and Know entities are the entities in the DBpedia. E is a set of edges (co-occurrences) of entity-entity, entity-word, and word-word corrences. Words within every 5-word sliding window in a news sentence are considered to be co-occuring with each other. The weights are represented by adjacency matrix using dataframe.

    Attributes:
        nodes: dictionary of nodes {"N (new entity)": [(word, label)], "K (Known entity)": [(word, label)], "C (Contextual word)": [(word, label)]} in the graph; Includes 3 types of objects (i.e. e new entites, known entities, and contextual words).
        edges: set contains the tuples. e.g. ("A", "B") indicates a link between node "A" and node "B".
        weights: The weights are represented by adjacency matrix using dataframe.
        news: list of news articles (articles are string type).
    """
    def __init__(self, lst_news, window_size=5):
        """Inits Graph
        Args:
            lst_news: list of string. list of news articles.
        """
        self.window_size = window_size
        self.news = list(lst_news)
        self.reader = Reader()
        self.search = Search()

        self.nodes = self.__create_nodes()
        self.edges = self.__create_edges()
        self.edge_weights = self.__create_weights()

    def __create_nodes(self):
        """Private class method
        Takes in a list of news articles (articles are string types):
        1) tokenize the articles
        2) remove stopwords
        3) label words with 3 labels (i.e. PERSON, ORGANIZATION, LOCATION)
        4) Match entities (i.e. person, org, loc) against DBpedia

        Returns:
            Returns a dictionary contains 3 types of objects (i.e. new entites, known entities, and contextual words). E.g. {"N": [("Washington", "LOCATION")], "K":[("Trump", "PERSON"), ("Hua Wei", "ORGANIZATION")], "C": [("the", "O"), ("am", "O")]}
        """

        # parse news articles
        tagged_words = self.reader.parse_news(self.news)
        # seperate entities from contextual words
        entities, cwords = self.__entities_words(tagged_words)
        new_e, known_e = self.search.query(entities)

        ret = dict()
        ret["N"] = list(set(new_e))
        ret["K"] = list(set(known_e))
        ret["C"] = list(set(cwords))

        return dict(ret)

    def get_nodes(self):
        """
        Getter method which returns all nodes from self.nodes.
        """
        ret = set()
        for i in self.nodes["N"]:
            ret.add(i[0])
        for i in self.nodes["K"]:
            ret.add(i[0])
        for i in self.nodes["C"]:
            ret.add(i[0])
        return list(ret)

    def get_entities(self):
        """
        Getter method which returns a list of entities (i.e. word tagged with "PERSON", "LOCATION", "ORGANIZATION") from self.nodes.
        """
        ret = set()
        for i in self.nodes["N"]:
            ret.add(i[0])
        for i in self.nodes["K"]:
            ret.add(i[0])
        return list(ret)

    def get_words(self):
        """
        Getter method which returns a list of contextual words from self.nodes.
        """
        ret = set()
        for i in self.nodes["C"]:
            ret.add(i[0])
        return list(ret)

    def __create_edges(self, window_size=5):
        """Private class method
        Takes in a list of news articles, and extract the co-occurring links between nodes. Nodes within 5-word sliding window in a news sentence are considered to be co-occuring with each other. The frequncies of nodes co-appearing in news sentences as weights of these links.  

        Returns:
            Returns a set of links between nodes. 
        """
        e = set()
        for article in self.news:
            self.tokenized_text = word_tokenize(article)
            self.tokenized_text = self.reader.filter_stop_words(
                self.tokenized_text)
            generator = self.sliding_window(self.tokenized_text,
                                            self.window_size)
            for t in generator:
                e = e.union(set(itertools.combinations(t, 2)))
        return set(e)

    def get_edges(self):
        """
        Getter method which returns a set of edges from self.edges.
        """
        return set(self.edges)

    def sliding_window(self, seq, n=5):
        """
        Returns a sliding window (of width n) over data from the iterable
           s -> (s0,s1,...s[n-1]), (s1,s2,...,sn), ...

        Args:
            seq: list of words; This is one news article splitted into a list of words.
            n: int; size of the sliding window

        Returns:
            An iterator contains all the sliced window. See the test case in `tests/test_graph.py` for more details.
        """
        it = iter(seq)
        result = tuple(islice(it, n))
        if len(result) <= n:
            yield result
        for elem in it:
            result = result[1:] + (elem, )
            yield result

    def __create_weights(self):
        """Private class method
        Create weights matrix using pandas dataframe. The value at ith row and jth row is the counts of links (undirected) between node i and node j.
        
        Returns:
            Return a copy of dataframe representing the weights matrix.
        """
        words = self.get_nodes()
        df = pd.DataFrame(index=words, columns=words).fillna(0)

        for article in self.news:
            self.tokenized_text = word_tokenize(article)
            self.tokenized_text = self.reader.filter_stop_words(
                self.tokenized_text)
            generator = self.sliding_window(self.tokenized_text,
                                            self.window_size)

            for t in generator:
                for tup in set(itertools.combinations(t, 2)):
                    if tup[0] != tup[1]:
                        df.loc[tup[0], tup[1]] += 1
                        df.loc[tup[1], tup[0]] += 1

        return df.copy()

    def get_weights(self):
        return self.edge_weights.copy()

    def __entities_words(self, tagged_words):
        """Private class method
        Seperate the entity words from the comtextual words.

        Args:
            tagged_words: list of strings; a list of tuples (word, label)

        Returns:
            entities: words tagged with "PERSON", "LOCATION", "ORGANIZATION".
            cwords: words tagged with "O" 
        """
        entities = list()
        cwords = list()
        for word in tagged_words:
            if word[1] == "O":
                # contextual words
                cwords.append(word)
            else:
                entities.append(word)
        assert len(entities) + len(cwords) == len(tagged_words)
        return entities, cwords

    def update_weight(self, e, w):
        """
        Update the edge weight in the enternal weight matrix. 
        Args:
            e: tuple; a tuple contains two nodes, e.g. ("A", "B")
            w: int; The new weight associated with e
        """
        if e[0] not in set(self.get_nodes()):
            raise ValueError("Node {} is not in the graph".format(str(e[0])))
        if e[1] not in set(self.get_nodes()):
            raise ValueError("Node {} is not in the graph".format(str(e[1])))

        if e in self.edges and w <= 0:
            self.edge_weights.loc[e[0], e[1]] = w
            self.edge_weights.loc[e[1], e[0]] = w
            self.edges.remove(e)
            self.edges.remove((e[1], e[0]))
        elif e in self.edges and w > 0:
            self.edge_weights.loc[e[0], e[1]] = w
            self.edge_weights.loc[e[1], e[0]] = w
        else:
            self.edge_weights.loc[e[0], e[1]] = w
            self.edge_weights.loc[e[1], e[0]] = w
            self.edges.add(e)
Beispiel #28
0
 def __init__(self, f,  start, goal):
     self.reader = Reader(f)
     self.start = tuple(map(int, start.split(',')))
     self.goal = tuple(map(int, goal.split(',')))
     self.expanded = []
Beispiel #29
0
class Embedding(object):
        """
        Python class which produces the joint embedding of the words and entities.
        To use it: E = Embedding(), entity_embedding, word_embedding = E.joint_embedding()
        Attributes:
        kg_graph: The knowledge Graph
        ee_graph: A heterogeneous subgraph of HEER, showing relations between entities
        cc_graph: A heterogeneous subgraph of HEER, showing relations between words
        ec_graph: A bipartite subgraph of HEER, showing relations between entities and words
        """
        def __init__(self):

        self.read = Reader()
#           self.news_list = ["Today's policy is about global warming", "Donald Trupm is the president of United States", "UCLA is the best school in southern California", "Noor Nakhaei is going to be student at UCLA", "the Boelter Hall is a dungeon", "UCLA is colaborating with Stanford", "Wenhao is meeting Trump", "Trump is in United Kingdom"]
          self.news_list = self.read.read_csv_file("./data/mixed-news/articles-title_only.csv")
          self.graph = Graph(self.news_list)
          self.words = self.graph.get_words()
          self.entities = self.graph.get_entities()
          self.ee_graph = EE(self.news_list)
          self.ec_graph = EC(self.news_list)
          self.cc_graph = CC(self.news_list)
          print("cc", self.cc_graph.get_edges())
          self.kg_graph = KG(self.news_list)
          self.d = 10 #THIS SHOULD BE CHANGED! 4, 10, 18  
          self.S = pd.DataFrame(1, index=self.entities, columns=range(0, self.d))
          self.T = pd.DataFrame(1, index=self.words, columns=range(0, self.d))
          for i in self.S.columns:
            for j in self.S.index:
              self.S[i][j] = randint(0, 10)
          for i in self.T.columns:
            for j in self.T.index:
              self.T[i][j] = randint(0, 10)

        def weighted_sample(self, items, n):
          """
          This function samples an item, proportional to it's weight attribute.
          Args:
              items: the list of edges we should choose between them.
              n: number of edges we should choose.
          Returns:
              Yields the chosen edge, proportional to it' weight
          """
          total = 0
          for j in items:
            total = float(sum(w for a, b, w in items))
              
          i = 0
          a, b, w = items[0]
          while n:
              x = total * (1 - random.random() ** (1.0 / n))
              total -= x
              while x > w:
                  x -= w
                  i += 1
                  a, b, w = items[i]
              w -= x
              yield a, b
              n -= 1

        def embedding_update(self, s, t, g, k=3):
          """
          This function updates the embeddings of words and entitites.
          Args:
              s: A binary number, indicting the type of embedding that should be updated.
              t: A binary number, indicting the type of embedding that should be updated.
              g: The graph; It could be the ee, cc, or ec subgraph, or the kg graph.
              k: Number of negative edges.

          """
          eta = 0.2       
          # Sample an edge from G and draw k negative edges
          # and I guess, when we sample an edge, we also update that node's weight in the embedding!
          # So for sampling I should have all the weights,
          df = g.get_weights()
          num_cols = g.get_nodes()
          edges = []
          for i in num_cols:
            for j in num_cols:
              if df[i][j] != 0:
                edge = []
                edge.append(i)
                edge.append(j)
                edge.append(df[i][j])
                edges.append(edge)
          sampled_edge = self.weighted_sample(edges, 1)

          for el in sampled_edge:
            sampled_node_a = el[0]
            sampled_node_b = el[1]

          #swap!
          if s == 1 and t == 1:
              print(sampled_node_a)
              print(sampled_node_b)
              if sampled_node_a in self.S.index:
                  s1 = sampled_node_b
                  sampled_node_b = sampled_node_a
                  sampled_node_a = s1
          # sampled_neg_nodes = []
          if s == 1 and t == 1:
              nodes = g.get_entities()
          else:
              nodes = g.get_nodes()
          # draw k negative edges!
          sampled_neg_nodes = random.sample(nodes, k) # [k]
          sampled_neg_nodes.append(sampled_node_b)

          #so up until here, we have k negative edges, one positive edge, the graph, and S_t, T_t
          if s == 1 and t == 1:  # S, T, G_ec
              sum = 0
              for i in range(k+1):
                  a = np.dot(self.S.loc[sampled_neg_nodes[i]], self.T.loc[sampled_node_a])
                  if a > 123:
                    a = 123
                  elif a < 0.1:
                    a = 0.5
                  b = np.exp(a)
                  sum = sum + b
              c = np.log(sum)
              d = self.S.loc[sampled_node_b].T
              e = - eta * d * c
              self.T.loc[sampled_node_a] = self.T.loc[sampled_node_a] - e
              sum = 0
              for i in range(k+1):
                  a = np.dot(self.S.loc[sampled_neg_nodes[i]], self.T.loc[sampled_node_a])
                  if a > 123:
                    a = 123
                  elif a < 0.1:
                    a = 0.5
                  b = np.exp(a)
                  sum = sum + b
              c = np.log(sum)
              d = self.T.loc[sampled_node_a].T

              e = - eta * d * c
              self.S.loc[sampled_node_b] = self.S.loc[sampled_node_b] - e
          elif s == 0 and t == 1:  # T, T, G_cc
              sum = 0
              for i in range(k+1):
                  a = np.dot(self.T.loc[sampled_neg_nodes[i]], self.T.loc[sampled_node_a])
                  if a > 123:
                    a = 123
                  elif a < 0.1:
                    a = 0.5
                  b = np.exp(a)
                  sum = sum + b
              c = np.log(sum)
              d = self.T.loc[sampled_node_b].T
              e = - eta * d * c
              self.T.loc[sampled_node_a] = self.T.loc[sampled_node_a] - e
          elif s == 1 and t == 0:  # S, S, G_ee
              sum = 0
              for i in range(k+1):
                  a = np.dot(self.S.loc[sampled_neg_nodes[i]], self.S.loc[sampled_node_a])
                  if a > 123:
                    a = 123
                  elif a < 0.1:
                    a = 0.5
                  b = np.exp(a)
                  sum = sum + b
              c = np.log(sum)
              d = self.S.loc[sampled_node_a].T
              e = - eta * d * c
              self.S.loc[sampled_node_b] = self.S.loc[sampled_node_b] - e

        def joint_embedding(self):
          """
          This function runs the iteration to minimize the cost function, and calls the update function..
          Attributes:
              theta: The guiding parameter, chosen empirically. The bigger it is, the more effective the kg graph is.
              k: Number of negatve smaples.
              t: Number of iterations.
          Returns:
              Returns two dataframes, first the entitiy embedding(normalized_S) and second the word embedding(normalized_T).

          """
          # the guiding parameter, which we should have empirically, the bigger it is, the more we are relying to our kg graph.
          theta = 0.5 #THIS SHOULD BE CHANGED! 0.2, 0.5, 0.7
          # number of negative samplings
          k = 2
          # number of iterations
          t = 100
          # the loop of the algorithm
          while t > 0:
              gamma = random.uniform(0, 1)
              if gamma <= theta:
                  self.embedding_update(1, 0, self.kg_graph, k)
              else:
                  self.embedding_update(1, 1, self.ec_graph, k)
                  self.embedding_update(1, 0, self.ee_graph, k)
                  self.embedding_update(0, 1, self.cc_graph, k)
              t = t - 1
          normalized_S=self.S.div(self.S.sum(axis=1), axis=0)
          normalized_T=self.T.div(self.T.sum(axis=1), axis=0)
          return normalized_S, normalized_T
Beispiel #30
0
            indexer.createIndex()

    if args.query != None:
        #if not os.path.isfile(idxfile):
        #    raise Exception("Could not find indexfile: {}".format(idxfile))
        if args.analyzer == None or args.language == 'all':
            raise ValueError(
                "To retrieve query you must specify analyzer and language")
        searcher = Searcher(index_path=args.index,
                            lang=args.language,
                            analyzer=args.analyzer,
                            dataset=args.dataset)
        searcher.queryTest(args.query)

    if args.run == 'reader':
        reader = Reader()
        reader.run(lang=args.lang,
                   analyzer=args.analyzer,
                   dataset=args.dataset)
    if args.metric == 'dist':
        metrics.hits(dataset=args.dataset,
                     langContext=args.language,
                     langQuestion=args.language,
                     distant=True,
                     k=50)

    if args.metric == 'hit@k':
        metrics.hits(dataset=args.dataset,
                     langContext=args.language,
                     langQuestion=args.language,
                     distant=False,
Beispiel #31
0
from src.log_simulator import LogSimulator
from time import time
from queue import Queue

DIR_NAME = os.path.dirname(os.path.abspath(__file__))

if __name__ == '__main__':

    read_line_queue = Queue()
    traffic_queue = Queue()
    alert_content = {
        'type': AlertSystem.ALERT_RECOVER_TYPE,
        'to_display': False
    }

    reader = Reader(DIR_NAME + '/data/access-log.log', read_line_queue,
                    traffic_queue)
    displayer = Displayer(read_line_queue, alert_content, 10, True)
    alert_system = AlertSystem(80, traffic_queue, alert_content, 120)
    log_simulator = LogSimulator(DIR_NAME + '/data/access-log.log',
                                 'localhost', ['/', '/section1'])

    current_time = time()

    log_simulator.start()
    reader.start()
    displayer.start()
    alert_system.start()

    while time() - current_time <= 120:
        log_simulator.resume()
        reader.resume()
Beispiel #32
0
    def test_success_reader(self):
        # Assume
        html = '<HTML><HEAD><TITLE>Ejemplo 2</TITLE></HEAD><BODY></BODY></HTML>'
        # Action
        reader = Reader()
        reader.feed(html)
        metrics = reader.get_metrics()
        # Assert
        self.assertEqual(metrics['total_elements'], 4)

        # Assume
        html2 = '<HTML><HEAD><IMG /><TITLE>Ejemplo 2</TITLE></HEAD><BODY></BODY></HTML>'
        # Action
        reader2 = Reader()
        reader2.feed(html2)
        metrics2 = reader2.get_metrics()
        # Assert
        self.assertEqual(metrics2['total_elements'], 5)

        # Assume
        html3 = '<HTML><HEAD><BADTAG><TITLE>Ejemplo 2</TITLE></HEAD><BADTAG><BODY></BODY></HTML>'
        # Action
        reader3 = Reader()
        reader3.feed(html3)
        metrics3 = reader3.get_metrics()
        # Assert
        self.assertEqual(metrics3['total_elements'], 4)
Beispiel #33
0
class Console(cmd.Cmd):

    def __init__(self):
        cmd.Cmd.__init__(self)
        self.prompt = "wayterm > "
        self.reader = Reader()

    def do_hist(self, args):
        """Print a list of commands that have been entered"""
        print self._hist

    def do_exit(self, args):
        """Exits from the console"""
        return -1

    def do_EOF(self, args):
        """Exit on system end of file character"""
        return self.do_exit(args)

    def do_shell(self, args):
        """Pass command to a system shell when line begins with '!'"""
        os.system(args)

    def do_help(self, args):
        """Get help on commands
           'help' or '?' with no arguments prints a list of commands for which help is available
           'help <command>' or '? <command>' gives help on <command>
        """
        ## The only reason to define this method is for the help text in the doc string
        self.reader.printfile('help')

    def preloop(self):
        """Initialization before prompting user for commands.
           Despite the claims in the Cmd documentaion, Cmd.preloop() is not a stub.
        """
        cmd.Cmd.preloop(self)   ## sets up command completion
        self._hist    = []      ## No history yet
        self._locals  = {}      ## Initialize execution namespace for user
        self._globals = {}

    def postloop(self):
        """Take care of any unfinished business.
           Despite the claims in the Cmd documentaion, Cmd.postloop() is not a stub.
        """
        cmd.Cmd.postloop(self)   ## Clean up command completion
        print "Exiting..."

    def precmd(self, line):
        """ This method is called after the line has been input but before
            it has been interpreted. If you want to modifdy the input line
            before execution (for example, variable substitution) do it here.
        """
        self._hist += [ line.strip() ]
        return line

    def postcmd(self, stop, line):
        """If you want to stop the console, return something that evaluates to true.
           If you want to do some post command processing, do it here.
        """
        return stop

    def emptyline(self):
        """Do nothing on empty input line"""
        pass

    def default(self, line):
        """Called on an input line when the command prefix is not recognized.
           In that case we execute the line as Python code.
        """
        try:
            wayterm.call(line.split('\\'))
        except Exception, e:
            print e.__class__, ":", e
Beispiel #34
0
def parse_fun(json_text):
    return parse(Reader(json_text))
Beispiel #35
0
#!/usr/bin/python3

from src.reader import Reader
import _thread
import argparse
from generate_logs import generate_logs
from src.constants import DEFAULT_OUTPUT_FILE
import sys
sys.path.append("./src")

if __name__ == '__main__':
    parser = argparse.ArgumentParser(__file__, description="Log Generator")
    parser.add_argument("--generate", "-g", dest="file_generate", help="Output file path", type=str)
    parser.add_argument("--file", "-f", dest="file_read", help="read file path", type=str)
    parser.add_argument("--threshold", "-t", dest="threshold", help="alerting thresholds", type=str)

    args = parser.parse_args()
    file_read = DEFAULT_OUTPUT_FILE if not args.file_read else args.file_read
    file_generate = args.file_generate
    threshold = args.threshold
    r = Reader()

    try:
        _thread.start_new_thread(generate_logs, (file_generate,))
        _thread.start_new_thread(r.read_lines, (file_read,))
    except:
       print("Error: unable to start thread")

    while 1:
       pass