def add_collection(self):
     our_cache = self.our_cache
     coll_address = str(uuid.uuid1())
     doc_hash_1 = str(uuid.uuid1())
     doc_hash_2 = str(uuid.uuid1())
     coll = Collection(
         title="Test",
         description="This is a collection!",
         address=coll_address,
         btc="123456789",
         keywords=[
         ],
         documents=[
             Document(
                 description="Test document A",
                 hash=doc_hash_1,
                 title="Test A",
                 ),
             Document(
                 description="Test document B",
                 hash=doc_hash_2,
                 title="Test B",
                 ),
         ],
         creation_date=datetime.datetime.now(),
         oldest_date=datetime.datetime.now(),
         latest_broadcast_date=datetime.datetime.now()
     )
     our_cache.insert_new_collection(coll)
     collections.update_hash(coll)
     our_cache.session.commit()
     return coll
예제 #2
0
    def remove_item_dict2_test(self):
        controller = DocumentListController()

        document = Document()
        document2 = Document()
        controller.add(document)
        controller.add(document2)
        controller.remove(document)

        self.assertEquals(1, len(controller.association))
        self.assertTrue(document2 in controller.association.values())
        self.assertFalse(document in controller.association.values())
예제 #3
0
 def setUp(self):
     self.controller = Controller()
     self.cache = self.controller.cache
     self.address = 'ffafaf'
     coll_address = str(uuid.uuid1())
     doc_hash_1 = str(uuid.uuid1())
     doc_hash_2 = str(uuid.uuid1())
     doc_hash_3 = str(uuid.uuid1())
     self.test_collection_evil = Collection(
         title="Test multiple33333",
         description="This is a collection! with multiple docs222",
         address=self.address,
         btc="123456789",
         keywords=[
             Keyword(name="Keyword A", id=1199),
             Keyword(name="Keyword c", id=1214),
         ],
         documents=[
             Document(
                 description="Test document Z",
                 hash="zzzzzzzz",
                 title="Test Z",
                 accesses=0,
                 filename="joe.txt",
                 collection_address="BM-2cSrapXpgDTFD8AyDmU1BGifNkB2Z6X9k8"
             ),
             Document(
                 description="Test document B",
                 hash='gdssgsdg',
                 title="Test B",
                 accesses=3,
                 filename="gile.txt",
                 collection_address="BM-2cSrapXpgDTFD8AyDmU1BGifNkB2Z6X9k8"
             ),
             Document(
                 description="Test document Bddd",
                 hash='afff',
                 title="Test B",
                 accesses=3,
                 filename="gile.txt",
                 collection_address="BM-2cSrapXpgDTFD8AyDmU1BGifNkB2Z6X9k8"
             ),
         ],
         creation_date=datetime.datetime.now(),
         oldest_date=datetime.datetime.now(),
         latest_broadcast_date=datetime.datetime.now(),
         latest_btc_tx="btctx1",
         oldest_btc_tx="btctx12",
         accesses=2,
         votes=3,
         votes_last_checked=datetime.datetime.now())
예제 #4
0
    def generate_documents(self, number) -> List[Document]:
        current_id = 1
        documents = []
        
        # create 6 NOT_RELEVANT documents 
        for i in range(number):
            documents.append(Document(current_id, 0))#Relevance.NOT_RELEVANT))
            current_id += 1

        # create 6 RELEVANT documents
        for i in range(number):
            documents.append(Document(current_id, 1))#Relevance.RELEVANT))
            current_id += 1

        return documents
예제 #5
0
    def _build_docs_keywords(self, payload, collection):
        """
        Builds a list of Keyword objects and a list of Document objects from the received json.

        :param payload: The payload of the FJ Message including the documents and keywords
        :return: Two lists representing the documents and keywords of the FJ Message
        """
        for key in payload["keywords"]:
            db_key = self.cache.get_keyword_by_id(key["id"])
            if db_key is not None:
                collection.keywords.append(db_key)
            else:
                collection.keywords.append(Keyword(name=key["name"]))

        for doc in payload["documents"]:
            db_doc = self.cache.get_document_by_hash(doc["hash"])
            if db_doc is not None:
                collection.documents.append(db_doc)
            else:
                collection.documents.append(
                    Document(collection_address=doc["address"],
                             description=doc["description"],
                             hash=doc["hash"],
                             title=doc["title"],
                             filename=doc["filename"],
                             accesses=doc["accesses"]))
예제 #6
0
파일: harvest.py 프로젝트: Cloudoki/fapi
def parse_invoices(entries, account):
    for entry in entries:
        i = entry['invoices']
        invoice = db.query(Document).filter_by(uid=i['id']).first()
        partner = db.query(Partner).filter_by(uid=i['client_id']).first()

        if invoice is None:
            invoice = Document(uid=i['id'],
                               account=account,
                               tags=[typetags['invoice']])
            db.add(invoice)
        else:
            u = list(set(invoice.tags).difference(set(tags.values())))
            if u: invoice.tags = u

        invoice.name = i['subject']
        invoice.value = i['amount']
        invoice.date = datetime.strptime(i['issued_at'], '%Y-%m-%d')
        invoice.updated_at = datetime.strptime(i['updated_at'],
                                               '%Y-%m-%dT%H:%M:%SZ')
        invoice.meta = json.dumps(i)
        invoice.partner = partner
        invoice.tags.append(tags[i['state']])

        if i['state'] == 'open' and datetime.strptime(
                i['due_at'], '%Y-%m-%d') < datetime.now():
            invoice.tags.append(tags['due'])
예제 #7
0
 def load_docs(self):
     """
     Aims at loading all the collection's documents (processed) in the collection instance.
     """
     pickle_path = f"pickle/{self.name}_docs.p"
     try:
         self.documents = load(open(pickle_path, "rb"))
         self.number_of_docs = len(self.documents)
     except FileNotFoundError:
         number_document_loaded = 0
         for id_directory in range(10):
             print(f"Loading directory {id_directory}")
             path_directory = self.path_to_corpus + str(id_directory)
             for text_file in listdir(path_directory):
                 # create a document instance
                 document = Document(
                     id_doc=number_document_loaded,
                     id_folder=id_directory,
                     address=text_file,
                 )
                 # load data and process documents (filter, remove stopwords and lemmatize)
                 document.get_content(self.path_to_corpus)
                 document.process_document(stopwords_list=self.stopwords,
                                           lemmatizer=self.lemmatizer)
                 self.documents.append(document)
                 number_document_loaded += 1
         makedirs(path.dirname(pickle_path), exist_ok=True)
         dump(self.documents, open(pickle_path, "wb"))
         self.number_of_docs = number_document_loaded
예제 #8
0
def parse_document(record, account, typestring):
    document = db.query(Document).filter_by(uid=record['uid']).first()
    partner = get_or_create(Partner, name=record['meta']['client_name'])

    if document is None:
        document = Document(**record)
        document.account = account
        document.tags = [typetags[typestring]]
        db.add(document)
    else:
        u = list(set(document.tags).difference(set(tags.values())))
        if u: document.tags = u

        document.name = record['name']
        document.value = record['value']
        document.date = record['date']

    document.updated_at = datetime.now()
    document.meta = CustomJSONEncoder().encode(record['meta'])
    document.partner = partner
    document.tags.append(tags[record['meta']['state']])

    if record['meta'][
            'state'] == 'open' and record['meta']['due_at'] < datetime.now():
        document.tags.append(tags['due'])
예제 #9
0
def convert_to_objects(a_paths, corpus, encoding, train_size):
    docs = []
    for path in a_paths[:train_size]:
        if ('MADE-1.0' in corpus):
            e_list, r_list = parse_xml(path, encoding)
            kwargs_for_doc = {
                'entities':
                e_list,
                'references':
                r_list,
                'annotation_path':
                path,
                'text_path':
                path.replace('annotations', 'corpus').replace('.bioc.xml', ''),
            }
        elif ('corpus_release' in corpus):
            e_list, r_list = parse_brat(path, encoding)
            kwargs_for_doc = {
                'entities': e_list,
                'references': r_list,
                'annotation_path': path,
                'text_path': path.replace('ann', 'txt'),
            }
        fictive_relations = get_fictive_relations(
            e_list, r_list, kwargs_for_doc.get('text_path'), encoding)
        kwargs_for_doc.update({'references': r_list + fictive_relations})
        docs.append(Document(**kwargs_for_doc))
    return docs
def add_collection():
    global our_cache
    coll_address = str(uuid.uuid1())
    doc_hash_1 = str(uuid.uuid1())
    doc_hash_2 = str(uuid.uuid1())
    coll = Collection(
            title="Test",
            description="This is a collection!",
            address=str(uuid.uuid1()),
            btc=str(uuid.uuid1()),
            keywords=[
            ],
            documents=[
                Document(
                    collection_address=doc_hash_1,
                    description="Test document A",
                    hash=str(uuid.uuid1()),
                    title="Test A",
                    ),
            ],
            creation_date=datetime.datetime.now(),
            oldest_date=datetime.datetime.now(),
            latest_broadcast_date=datetime.datetime.now()
    )
    our_cache.insert_new_collection(coll)
    collections.update_hash(coll)
    our_cache.session.commit()
    return coll
예제 #11
0
def put_document(file_path, collection_address, title, description):
    """ Insert a document into the local cache with associated information
        and upload the document to the freenet network.
        :param file_path: the path of the file to upload
        :param collection_address: the collection address associated with the document
        :param title: the title of the document being uploaded
        :param description: the description of the document being uploaded
    """
    file_name = os.path.basename(file_path)
    contents = open(file_path).read()
    freeCon = FreenetConnection()
    uri = freeCon.put(contents)
    name, extension = os.path.splitext(file_name)
    hash_name = uri
    new_file_name = hash_name + extension
    shutil.copy(file_path, os.path.expanduser(config.DOCUMENT_DIRECTORY_PATH) + new_file_name)
    document = Document(
        collection_address = collection_address,
        description = description,
        hash = uri,
        title = title,
        filename = new_file_name,
        accesses = 0
    )
    cache.insert_new_document(document)
    collection = cache.get_collection_with_address(collection_address)
    collections.update_hash(collection)
    print ("Inserted " + file_path + " successfully with URI " + uri)
    print ("Allow up to 10 minutes for file to propogate on the freenet network")
예제 #12
0
    def search(self, query) -> Documents:
        title_boost = 'title^' + str(ELASTIC_TITLE_BOOST)
        text_boost = 'text^' + str(ELASTIC_TEXT_BOOST)
        s = Search(using=self.client, index=INDEX_NAME) \
            .query("multi_match", query=query, fields=[title_boost, text_boost])
        response = s.execute()

        table = PrettyTable(['Index', 'Title', 'Score', 'Popularity'])
        docs = Documents()
        skip_count = 0
        for idx, doc in enumerate(response):
            if not any(excl in doc.title for excl in TITLE_EXCLUDES) and \
               not any(excl in doc.category for excl in CAT_EXCL) and \
               REFER_TEXT not in doc.text:
                # uft encode values
                doc.title = str.encode(doc.title, encoding='utf-8').decode(encoding='utf-8')
                doc.text = str.encode(doc.text, encoding='utf-8').decode(encoding='utf-8')

                docs.add(Document(doc.title, doc.text, doc.meta.score, idx))
                format_num = lambda x: '{0:.2f}'.format(x)
                table.add_row([idx, doc.title, format_num(doc.meta.score), doc.popularity_score])
            else:
                skip_count += 1
        Logger.info('Elastic result:\n' + str(table))
        Logger.info(str(skip_count) + ' elastic results were skipped')
        return docs
예제 #13
0
    def remove_item_dict_test(self):
        controller = DocumentListController()

        document = Document()
        controller.add(document)
        controller.remove(document)

        self.assertEquals(0, len(controller.association))
예제 #14
0
    def get_item_from_document_test(self):
        controller = DocumentListController()

        document = Document()
        controller.add(document)

        item = controller.get_item_from_document(document)
        self.assertEquals(document, controller.association[item])
예제 #15
0
    def test_open_blank_file(self):
        document = Document()
        text_file = open("test_file", "w")
        text_file.close()
        document.open("test_file")

        self.assertEquals("test_file", document.path)
        self.assertEquals("", document.text)
예제 #16
0
    def setUp(self):
        self.controller = Controller()
        self.address = self.controller.connection.create_address('Controller Test address', True)

        coll_address = str(uuid.uuid1())
        doc_hash_1 = str(uuid.uuid1())
        doc_hash_2 = str(uuid.uuid1())
        doc_hash_3 = str(uuid.uuid1())

        self.test_collection = Collection(
            title="Test",
            description="This is a collection!",
            address=self.address,
            btc="123456789",
            keywords=[
                Keyword(name="Keyword A"),
                Keyword(name="Keyword B"),
            ],
            documents=[
                Document(
                    description="Test document A",
                    hash=doc_hash_1,
                    title="Test A",
                    accesses=0,
                    filename="joe.txt",
                    collection_address="afgagahhsgh"
                    ),
                Document(
                    description="Test document B",
                    hash=doc_hash_2,
                    title="Test B",
                    accesses=3,
                    filename="gile.txt",
                    collection_address="afgagasghhhss"
                    ),
            ],
            creation_date=datetime.datetime.now(),
            oldest_date=datetime.datetime.now(),
            latest_broadcast_date=datetime.datetime.now(),
            latest_btc_tx="btctx1",
            oldest_btc_tx="btctx12",
            accesses=2,
            votes=3,
            votes_last_checked=datetime.datetime.now()
        )
        self.test_signature = Signature(pubkey='itsakey',address=self.address)
예제 #17
0
    def test_text_save_file(self):
        document = Document()
        document.text = "this is only a test of save file"
        document.path = "test_file"

        document.save()

        text_file = open(document.path, "r")
        self.assertEquals(document.text, text_file.read())
예제 #18
0
    def test_save_inexistent_file(self):
        document = Document()
        document.text = "this is only a test of save file"
        document.path = "test_file"

        document.save()

        self.assertTrue(os.path.exists(document.path))
        self.assertEquals("this is only a test of save file", document.text)
예제 #19
0
    def test_open_text_file(self):
        document = Document()
        text_file = open("test_file", "w")
        text_file.write("this is only a test")
        text_file.close()
        document.open("test_file")

        self.assertEquals("test_file", document.path)
        self.assertEquals("this is only a test", document.text)
예제 #20
0
    def add_dict_test(self):
        controller = DocumentListController()
        document = Document()

        controller.add(document)

        self.assertEquals(document, controller.association.values()[0])
        self.assertTrue(
            type(controller.association.keys()[0]) == QtGui.QStandardItem)
예제 #21
0
    def add_item_name_test(self):
        controller = DocumentListController()
        document = Document()
        document.path = "/path/to/test.tf"

        controller.add(document)
        item = controller.association.keys()[0]

        self.assertEquals("test.tf", item.text())
예제 #22
0
    def remove_item_return_test(self):
        controller = DocumentListController()

        document = Document()
        controller.add(document)

        document_item = controller.association.items()[0][0]
        removed_item = controller.remove(document)

        self.assertEquals(document_item, removed_item)
예제 #23
0
    def test__get_revision_by_timestamp(self):
        timestamp = datetime(2020, 1, 1, 1, 1, 1)

        revision = Revision(id=1,
                            content='hello',
                            timestamp=timestamp,
                            document_id=5)

        document = Document(id=5, title='blah', revisions=[revision])

        assert document.get_revision_by_timestamp(timestamp) == revision
예제 #24
0
 def test_different_root_hash(self):
     d = Document(
         description="Test document A",
         hash="asdfasdfa;sldkfja;sldkfja;dljkfa;ldf",
         collection_address="bm-first",
         title="Test A",
     )
     d2 = Document(
         description="Test document B",
         hash="fdasdfsdfsdfsdfsdfsdfsdfdfsdfsddfdfdf",
         collection_address="bm-first",
         title="Test B",
     )
     self.cache.insert_new_document(d)
     collections.update_hash(self.collection1)
     self.cache.insert_new_document(d2)
     collections.update_hash(self.collection1)
     versions = self.cache.get_versions_for_collection(
         self.collection1.address)
     self.assertTrue(versions[0].root_hash != versions[1].root_hash)
예제 #25
0
    def change_filename_test(self):
        controller = DocumentListController()

        document = Document()
        document.path = "/path/to/othertest.tf"

        controller.add(document)
        item = controller.association.keys()[0]
        controller.change_filename(document, "/new/file/name.tf")

        self.assertEquals("name.tf", item.text())
예제 #26
0
    def test_replace_text_save_file(self):
        text_file = open("test_file", "w")
        text_file.write("this is only a test file")
        text_file.close()

        document = Document()
        document.text = "I changed the text"
        document.path = "test_file"

        document.save()

        text_file = open("test_file", "r")
        self.assertEquals(document.text, text_file.read())
 def load(self):
     """ Loads Document from specified file """
     with open(self.file_name, "r", encoding='utf8') as file:
         json_doc = json.loads(file.read())
     self.language = json_doc["metadata"]["language"]
     features = [
         Feature(feature['type'], feature['words'], feature['context'],
                 self.__letters_to_int(feature['letters']),
                 feature['transcription'])
         for feature in json_doc["features"]
     ]
     chapters = self.__load_chapters(json_doc["text"])
     stop_words = json_doc["stop_words"]
     return Document(chapters, self.language, features, stop_words)
예제 #28
0
 def test_two_doc_insert(self):
     d = Document(
         description="Test document A",
         hash="asdfasdfa;sldkfja;sldkfja;dljkfa;ldf",
         collection_address="bm-first",
         title="Test A",
     )
     d2 = Document(
         description="Test document B",
         hash="fdasdfsdfsdfsdfsdfsdfsdfdfsdfsddfdfdf",
         collection_address="bm-first",
         title="Test B",
     )
     self.cache.insert_new_document(d)
     collections.update_hash(self.collection1)
     self.cache.insert_new_document(d2)
     collections.update_hash(self.collection1)
     versions = self.cache.get_versions_for_collection(
         self.collection1.address)
     if (len(versions) < 2):
         print(len(versions))
         self.fail("No new version was created")
     self.assertTrue(len(versions) == 2)
    def build_document(self, data_dict: dict) -> None:
        """
        Build a Document from the data passed in.

        :param data_dict: dict
        """
        self._validate_data(data_dict)

        document = self.document_repository.get_by_title(data_dict['title'])

        if document:
            new_revision = Revision(content=data_dict['content'])
            document.revisions.append(new_revision)
            self.document_repository.save(document)
        else:
            revision = Revision(content=data_dict['content'])
            document = Document(title=data_dict['title'], revisions=[revision])
            self.document_repository.save(document)
 def test(self):
     global our_cache
     with our_cache.session.no_autoflush:
         try:
             d = Document(
                 description=str(uuid.uuid4()),
                 hash=str(uuid.uuid4()),
                 collection_address=test_coll.address,
                 title=str(uuid.uuid4()),
             )
             our_cache.insert_new_collection(test_coll)
             our_cache.insert_new_document_in_collection(d, test_coll)
         except:
             # Test already ran
             return True
     collections.update_hash(test_coll)
     curr_value = test_coll.get_latest_collection_version().root_hash
     self.assertNotEqual(str(curr_value),prev_value)