def add_collection(self): our_cache = self.our_cache coll_address = str(uuid.uuid1()) doc_hash_1 = str(uuid.uuid1()) doc_hash_2 = str(uuid.uuid1()) coll = Collection( title="Test", description="This is a collection!", address=coll_address, btc="123456789", keywords=[ ], documents=[ Document( description="Test document A", hash=doc_hash_1, title="Test A", ), Document( description="Test document B", hash=doc_hash_2, title="Test B", ), ], creation_date=datetime.datetime.now(), oldest_date=datetime.datetime.now(), latest_broadcast_date=datetime.datetime.now() ) our_cache.insert_new_collection(coll) collections.update_hash(coll) our_cache.session.commit() return coll
class TextEditorController(object): def __init__(self): self.document = Document() def open(self, path): try: self.document.open(path) except IOError: success = False else: success = True return success, self.document def save(self, text, path=None): self.document.text = text if path is not None: self.document.path = path try: self.document.save() except IOError: return False else: return True
class DocumentTest(unittest.TestCase): file_name = u'models/test_data/lorem.json' meta = {'title': u'test 稢綌', 'author': u'gorden 胇赲'} body = u'In id tristique orci. 痵痽 犵艿邔 疿疶砳 齸圞趲.' pre_file_name = file_name + '_PRE.json' raw_file_name = file_name def setUp(self): self.doc = Document(file_name=self.file_name, metadata=self.meta, pre_file_name=self.pre_file_name, raw_file_name=self.raw_file_name) def test_clone(self): """ Test cloning a document """ doc_cloned = self.doc.clone() self.assertEqual(doc_cloned.file_name, self.doc.file_name) self.assertEqual(doc_cloned.pre_file_name, self.doc.pre_file_name) self.assertEqual(doc_cloned.raw_file_name, self.doc.raw_file_name) self.assertEqual(doc_cloned.metadata, self.doc.metadata) self.assertEqual(doc_cloned.raw_body, self.doc.raw_body) self.assertEqual(self.doc, doc_cloned) # using assertFalse instead of assertNotEqual in order to # test __eq__ doc_cloned.file_name = u'nope' self.assertFalse(self.doc == doc_cloned) doc_cloned.file_name = self.doc.file_name doc_cloned.metadata = None self.assertFalse(self.doc == doc_cloned) doc_cloned.metadata = self.doc.metadata doc_cloned.raw_file_name = '' self.assertFalse(self.doc == doc_cloned) doc_cloned.raw_file_name = self.doc.raw_file_name doc_cloned.pre_file_name = '' self.assertFalse(self.doc == doc_cloned) def test_to_dict(self): """ Test conversion to dictionary (for json serialization) """ doc_dict = self.doc.to_dict() self.assertEqual(doc_dict['file_name'], self.doc.file_name) self.assertEqual(doc_dict['metadata'], self.doc.metadata) self.assertEqual(doc_dict['pre_file_name'], self.doc.pre_file_name) # TODO check raw def test_open(self): """ Test opening a Document json """ self.assertRaises(InvalidDocumentException, Document.from_json, 'models/test_data/invalid.json') self.assertRaises(InvalidDocumentException, Document.from_json, 'models/test_data/invalid.txt')
def test_open_blank_file(self): document = Document() text_file = open("test_file", "w") text_file.close() document.open("test_file") self.assertEquals("test_file", document.path) self.assertEquals("", document.text)
def test_open_text_file(self): document = Document() text_file = open("test_file", "w") text_file.write("this is only a test") text_file.close() document.open("test_file") self.assertEquals("test_file", document.path) self.assertEquals("this is only a test", document.text)
def add_item_name_test(self): controller = DocumentListController() document = Document() document.path = "/path/to/test.tf" controller.add(document) item = controller.association.keys()[0] self.assertEquals("test.tf", item.text())
def test_text_save_file(self): document = Document() document.text = "this is only a test of save file" document.path = "test_file" document.save() text_file = open(document.path, "r") self.assertEquals(document.text, text_file.read())
def test_save_inexistent_file(self): document = Document() document.text = "this is only a test of save file" document.path = "test_file" document.save() self.assertTrue(os.path.exists(document.path)) self.assertEquals("this is only a test of save file", document.text)
def add_item_name2_test(self): controller = DocumentListController() document = Document() document.path = "/path/to/othertest.tf" controller.add(document) item = controller.association.keys()[0] self.assertEquals("othertest.tf", item.text())
def parse_invoices(entries, account): for entry in entries: i = entry['invoices'] invoice = db.query(Document).filter_by(uid=i['id']).first() partner = db.query(Partner).filter_by(uid=i['client_id']).first() if invoice is None: invoice = Document(uid=i['id'], account=account, tags=[typetags['invoice']]) db.add(invoice) else: u = list(set(invoice.tags).difference(set(tags.values()))) if u: invoice.tags = u invoice.name = i['subject'] invoice.value = i['amount'] invoice.date = datetime.strptime(i['issued_at'], '%Y-%m-%d') invoice.updated_at = datetime.strptime(i['updated_at'], '%Y-%m-%dT%H:%M:%SZ') invoice.meta = json.dumps(i) invoice.partner = partner invoice.tags.append(tags[i['state']]) if i['state'] == 'open' and datetime.strptime( i['due_at'], '%Y-%m-%d') < datetime.now(): invoice.tags.append(tags['due'])
def change_filename_test(self): controller = DocumentListController() document = Document() document.path = "/path/to/othertest.tf" controller.add(document) item = controller.association.keys()[0] controller.change_filename(document, "/new/file/name.tf") self.assertEquals("name.tf", item.text())
def test__get_revision_by_timestamp(self): timestamp = datetime(2020, 1, 1, 1, 1, 1) revision = Revision(id=1, content='hello', timestamp=timestamp, document_id=5) document = Document(id=5, title='blah', revisions=[revision]) assert document.get_revision_by_timestamp(timestamp) == revision
def remove_item_dict2_test(self): controller = DocumentListController() document = Document() document2 = Document() controller.add(document) controller.add(document2) controller.remove(document) self.assertEquals(1, len(controller.association)) self.assertTrue(document2 in controller.association.values()) self.assertFalse(document in controller.association.values())
def from_dict(d): """ Convert dict representation to MatchSet :param d: dict representation of a MatchSet :return: MatchSet """ matches = [Match.from_dict(m) for m in d['matches']] alpha = Document.from_json(d['alpha_doc']) beta = Document.from_json(d['beta_doc']) return MatchSet(alpha_doc=alpha, beta_doc=beta, matches=matches)
def test_replace_text_save_file(self): text_file = open("test_file", "w") text_file.write("this is only a test file") text_file.close() document = Document() document.text = "I changed the text" document.path = "test_file" document.save() text_file = open ("test_file", "r") self.assertEquals(document.text, text_file.read())
def setUp(self): self.controller = Controller() self.cache = self.controller.cache self.address = 'ffafaf' coll_address = str(uuid.uuid1()) doc_hash_1 = str(uuid.uuid1()) doc_hash_2 = str(uuid.uuid1()) doc_hash_3 = str(uuid.uuid1()) self.test_collection_evil = Collection( title="Test multiple33333", description="This is a collection! with multiple docs222", address=self.address, btc="123456789", keywords=[ Keyword(name="Keyword A", id=1199), Keyword(name="Keyword c", id=1214), ], documents=[ Document( description="Test document Z", hash="zzzzzzzz", title="Test Z", accesses=0, filename="joe.txt", collection_address="BM-2cSrapXpgDTFD8AyDmU1BGifNkB2Z6X9k8" ), Document( description="Test document B", hash='gdssgsdg', title="Test B", accesses=3, filename="gile.txt", collection_address="BM-2cSrapXpgDTFD8AyDmU1BGifNkB2Z6X9k8" ), Document( description="Test document Bddd", hash='afff', title="Test B", accesses=3, filename="gile.txt", collection_address="BM-2cSrapXpgDTFD8AyDmU1BGifNkB2Z6X9k8" ), ], creation_date=datetime.datetime.now(), oldest_date=datetime.datetime.now(), latest_broadcast_date=datetime.datetime.now(), latest_btc_tx="btctx1", oldest_btc_tx="btctx12", accesses=2, votes=3, votes_last_checked=datetime.datetime.now())
def process_parallel_worker(a, output_dir, gap_length, match_length, b, comparator): """ Worker for processing two files at a time in parallel """ comparator_path = COMPARATOR_PATH.format(comparator) comparator = importlib.import_module(comparator_path) pro = processor.Processor(output_dir=output_dir, comparator=comparator, gap_length=gap_length, match_length=match_length, percentage_match_length=None) alpha = Document.from_json(a) beta = Document.from_json(b) pro.process(alpha_document=alpha, beta_document=beta)
def generate_documents(self, number) -> List[Document]: current_id = 1 documents = [] # create 6 NOT_RELEVANT documents for i in range(number): documents.append(Document(current_id, 0))#Relevance.NOT_RELEVANT)) current_id += 1 # create 6 RELEVANT documents for i in range(number): documents.append(Document(current_id, 1))#Relevance.RELEVANT)) current_id += 1 return documents
def post(self): self.response.write('<html><body>') htmlcontent = self.request.get('htmlcontent') csscontent = self.request.get('csscontent') documentName = self.request.get('documentName') humanname = self.request.get('humanname') documents_query = Document.query(ancestor=Document.getkey(documentName)) document = documents_query.fetch()[0] document.htmlcontent = htmlcontent document.csscontent = csscontent document.name = humanname document.put()
def _build_docs_keywords(self, payload, collection): """ Builds a list of Keyword objects and a list of Document objects from the received json. :param payload: The payload of the FJ Message including the documents and keywords :return: Two lists representing the documents and keywords of the FJ Message """ for key in payload["keywords"]: db_key = self.cache.get_keyword_by_id(key["id"]) if db_key is not None: collection.keywords.append(db_key) else: collection.keywords.append(Keyword(name=key["name"])) for doc in payload["documents"]: db_doc = self.cache.get_document_by_hash(doc["hash"]) if db_doc is not None: collection.documents.append(db_doc) else: collection.documents.append( Document(collection_address=doc["address"], description=doc["description"], hash=doc["hash"], title=doc["title"], filename=doc["filename"], accesses=doc["accesses"]))
def add_collection(): global our_cache coll_address = str(uuid.uuid1()) doc_hash_1 = str(uuid.uuid1()) doc_hash_2 = str(uuid.uuid1()) coll = Collection( title="Test", description="This is a collection!", address=str(uuid.uuid1()), btc=str(uuid.uuid1()), keywords=[ ], documents=[ Document( collection_address=doc_hash_1, description="Test document A", hash=str(uuid.uuid1()), title="Test A", ), ], creation_date=datetime.datetime.now(), oldest_date=datetime.datetime.now(), latest_broadcast_date=datetime.datetime.now() ) our_cache.insert_new_collection(coll) collections.update_hash(coll) our_cache.session.commit() return coll
def search(self, query) -> Documents: title_boost = 'title^' + str(ELASTIC_TITLE_BOOST) text_boost = 'text^' + str(ELASTIC_TEXT_BOOST) s = Search(using=self.client, index=INDEX_NAME) \ .query("multi_match", query=query, fields=[title_boost, text_boost]) response = s.execute() table = PrettyTable(['Index', 'Title', 'Score', 'Popularity']) docs = Documents() skip_count = 0 for idx, doc in enumerate(response): if not any(excl in doc.title for excl in TITLE_EXCLUDES) and \ not any(excl in doc.category for excl in CAT_EXCL) and \ REFER_TEXT not in doc.text: # uft encode values doc.title = str.encode(doc.title, encoding='utf-8').decode(encoding='utf-8') doc.text = str.encode(doc.text, encoding='utf-8').decode(encoding='utf-8') docs.add(Document(doc.title, doc.text, doc.meta.score, idx)) format_num = lambda x: '{0:.2f}'.format(x) table.add_row([idx, doc.title, format_num(doc.meta.score), doc.popularity_score]) else: skip_count += 1 Logger.info('Elastic result:\n' + str(table)) Logger.info(str(skip_count) + ' elastic results were skipped') return docs
def get_documents(): page = int(request.args.get('page', 1)) order = request.args.get('order', 'desc') purchases = Document.get_by_page(order, page) return response(documents_schema.dump(purchases))
def put_document(file_path, collection_address, title, description): """ Insert a document into the local cache with associated information and upload the document to the freenet network. :param file_path: the path of the file to upload :param collection_address: the collection address associated with the document :param title: the title of the document being uploaded :param description: the description of the document being uploaded """ file_name = os.path.basename(file_path) contents = open(file_path).read() freeCon = FreenetConnection() uri = freeCon.put(contents) name, extension = os.path.splitext(file_name) hash_name = uri new_file_name = hash_name + extension shutil.copy(file_path, os.path.expanduser(config.DOCUMENT_DIRECTORY_PATH) + new_file_name) document = Document( collection_address = collection_address, description = description, hash = uri, title = title, filename = new_file_name, accesses = 0 ) cache.insert_new_document(document) collection = cache.get_collection_with_address(collection_address) collections.update_hash(collection) print ("Inserted " + file_path + " successfully with URI " + uri) print ("Allow up to 10 minutes for file to propogate on the freenet network")
def convert_to_objects(a_paths, corpus, encoding, train_size): docs = [] for path in a_paths[:train_size]: if ('MADE-1.0' in corpus): e_list, r_list = parse_xml(path, encoding) kwargs_for_doc = { 'entities': e_list, 'references': r_list, 'annotation_path': path, 'text_path': path.replace('annotations', 'corpus').replace('.bioc.xml', ''), } elif ('corpus_release' in corpus): e_list, r_list = parse_brat(path, encoding) kwargs_for_doc = { 'entities': e_list, 'references': r_list, 'annotation_path': path, 'text_path': path.replace('ann', 'txt'), } fictive_relations = get_fictive_relations( e_list, r_list, kwargs_for_doc.get('text_path'), encoding) kwargs_for_doc.update({'references': r_list + fictive_relations}) docs.append(Document(**kwargs_for_doc)) return docs
def get_item_from_document_test(self): controller = DocumentListController() document = Document() controller.add(document) item = controller.get_item_from_document(document) self.assertEquals(document, controller.association[item])
def remove_item_dict_test(self): controller = DocumentListController() document = Document() controller.add(document) controller.remove(document) self.assertEquals(0, len(controller.association))
def setUp(self): self.controller = Controller() self.address = self.controller.connection.create_address('Controller Test address', True) coll_address = str(uuid.uuid1()) doc_hash_1 = str(uuid.uuid1()) doc_hash_2 = str(uuid.uuid1()) doc_hash_3 = str(uuid.uuid1()) self.test_collection = Collection( title="Test", description="This is a collection!", address=self.address, btc="123456789", keywords=[ Keyword(name="Keyword A"), Keyword(name="Keyword B"), ], documents=[ Document( description="Test document A", hash=doc_hash_1, title="Test A", accesses=0, filename="joe.txt", collection_address="afgagahhsgh" ), Document( description="Test document B", hash=doc_hash_2, title="Test B", accesses=3, filename="gile.txt", collection_address="afgagasghhhss" ), ], creation_date=datetime.datetime.now(), oldest_date=datetime.datetime.now(), latest_broadcast_date=datetime.datetime.now(), latest_btc_tx="btctx1", oldest_btc_tx="btctx12", accesses=2, votes=3, votes_last_checked=datetime.datetime.now() ) self.test_signature = Signature(pubkey='itsakey',address=self.address)
def get_documents_date(): date_to = request.args.get('to', datetime.today()) date_from = request.args.get('from', datetime.today()) date_from = datetime.fromisoformat(date_from) date_to = datetime.fromisoformat(date_to) + timedelta(days=1) purchases = Document.get_by_dates(document_type='COMPRA',\ date_from=date_from, date_to=date_to) return response(documents_schema.dump(purchases))
def add_dict_test(self): controller = DocumentListController() document = Document() controller.add(document) self.assertEquals(document, controller.association.values()[0]) self.assertTrue( type(controller.association.keys()[0]) == QtGui.QStandardItem)
def get(self): documentName = self.request.get('documentName') documents_query = Document.query(ancestor=Document.getkey(documentName)) document = documents_query.fetch() if len(document) == 0: self.response.write(TEMPLATE_404.render({})) return document = document[0] template = JINJA_ENVIRONMENT.get_template('editor.html') self.response.write(template.render({ 'htmlcontent' : document.htmlcontent, 'csscontent' : document.csscontent, 'humanname' : document.name, 'id' : documentName, 'title' : 'Editing ' + document.name }))
def test__get_latest_revision(self): timestamp = datetime(2020, 1, 1, 1, 1, 1) timestamp_latest = datetime(2020, 2, 1, 1, 1, 1) revision = Revision(id=1, content='hello', timestamp=timestamp, document_id=5) revision_latest = Revision(id=2, content='hello again', timestamp=timestamp_latest, document_id=5) document = Document(id=5, title='blah', revisions=[revision, revision_latest]) assert document.get_latest_revision() == revision_latest
def remove_item_return_test(self): controller = DocumentListController() document = Document() controller.add(document) document_item = controller.association.items()[0][0] removed_item = controller.remove(document) self.assertEquals(document_item, removed_item)
def process(self): """ Perform processing Creates raw and preprocessed versions of the input file as well as a json file representing the models.Document """ start_time = time.time() name = path.get_name(self.file_name, extension=False) output_name = name + PREPROCESS_SUFFIX in_file = self.file_name out_file = os.path.join(self.output_dir, output_name) if file_ops.exists(out_file): # Already preprocessed return if in_file.endswith('.tei') or in_file.endswith('.xml'): reader = TEIReader(in_file) raw_text, metadata = reader.read() else: raw_text = file_ops.read_utf8(in_file) metadata = {} raw_file = os.path.join(self.output_dir, 'raw' + os.sep, name + PLAIN_SUFFIX) file_ops.write_utf8(raw_file, raw_text) processed_text = self.standardizer.standardize(raw_text) pre_file = os.path.join(self.output_dir, 'pre' + os.sep, name + PLAIN_SUFFIX) file_ops.write_utf8(pre_file, processed_text) out_document = Document(file_name=self.file_name, raw_file_name=raw_file, pre_file_name=pre_file, metadata=metadata) processed_dict = out_document.to_dict() file_ops.write_json_utf8(out_file, processed_dict) duration = time.time() - start_time self._log_duration(duration, self.file_name, len(raw_text))
def process_serial(args, alpha_files, beta_files): """ Process on a single thread """ comparator_path = COMPARATOR_PATH.format(args.comparator) comparator = importlib.import_module(comparator_path) pro = processor.Processor(output_dir=args.output_dir, comparator=comparator, gap_length=args.gap_length, match_length=args.match_length, percentage_match_length=None) compared = [] for a, b in itertools.product(alpha_files, beta_files): this_set = sorted([a, b]) if a != b and this_set not in compared: alpha = Document.from_json(a) beta = Document.from_json(b) pro.process(alpha_document=alpha, beta_document=beta) compared.append(this_set) return len(compared)
def test_different_root_hash(self): d = Document( description="Test document A", hash="asdfasdfa;sldkfja;sldkfja;dljkfa;ldf", collection_address="bm-first", title="Test A", ) d2 = Document( description="Test document B", hash="fdasdfsdfsdfsdfsdfsdfsdfdfsdfsddfdfdf", collection_address="bm-first", title="Test B", ) self.cache.insert_new_document(d) collections.update_hash(self.collection1) self.cache.insert_new_document(d2) collections.update_hash(self.collection1) versions = self.cache.get_versions_for_collection( self.collection1.address) self.assertTrue(versions[0].root_hash != versions[1].root_hash)
def test__get_revision_by_timestamp_expect_most_recent(self): timestamp = datetime(2020, 1, 1, 1, 1, 1) timestamp_latest = datetime(2020, 2, 1, 1, 1, 1) timestamp_in_between = datetime(2020, 1, 29, 1, 1, 1) revision = Revision(id=1, content='hello', timestamp=timestamp, document_id=5) revision_latest = Revision(id=2, content='hello again', timestamp=timestamp_latest, document_id=5) document = Document(id=5, title='blah', revisions=[revision, revision_latest]) assert document.get_revision_by_timestamp( timestamp_in_between) == revision
def get(self): htmlcontent = self.request.get('htmlcontent') csscontent = self.request.get('csscontent') documentName = self.request.get('documentName') documents_query = Document.query(ancestor=Document.getkey(documentName)) document = documents_query.fetch() if len(document) == 0: self.response.write(TEMPLATE_404.render({})) return document = document[0] template = JINJA_ENVIRONMENT.get_template('view.html') self.response.write(template.render({ 'htmlcontent' : document.htmlcontent, 'csscontent' : document.csscontent, 'editurl' : '/edit?' + urllib.urlencode({'documentName' : documentName}), 'title' : document.name, 'id' : documentName }))
def setUp(self): self.passages_a = [chr(i + ord('a')) for i in xrange(10)] self.passages_b = [chr(i + ord('A')) for i in xrange(10)] self.file_a = 'models/test_data/match_set_test.json' self.document_a = Document.from_json(self.file_a) self.file_b = 'models/test_data/match_set_test2.json' self.document_b = Document.from_json(self.file_b) self.matches = [] self.singlet_pairs = [] for i in xrange(len(self.passages_a)): a = MatchHalf(passage=self.passages_a[i]) b = MatchHalf(passage=self.passages_b[i]) s_pair = (a, b) self.singlet_pairs.append(s_pair) # Alpha/beta need to be actual documents, not names self.matches = Processor.singlet_pairs_to_matches(alpha=self.document_a, beta=self.document_b, singlet_pairs=self.singlet_pairs) self.match_set = MatchSet(alpha_doc=self.document_a, beta_doc=self.document_b, matches=self.matches)
def get(self): documents_query = Document.query() documents = documents_query.fetch() documents = map(lambda x: { 'name' : x.documentName, 'url' : '/view?' + urllib.urlencode({'documentName' : x.documentName}), 'editurl' : '/edit?' + urllib.urlencode({'documentName' : x.documentName}), 'humanname' : x.name or '[No name]', 'date' : x.date }, documents) template = JINJA_ENVIRONMENT.get_template('list.html') self.response.write(template.render({ 'list' : documents }))
def test_read(self): """ Test reading of TEI xml file """ real_data_file = self._get_test_file_name(TEI_ZHI) tei_doc = tei_document.TEIDocument(real_data_file) tei_data = tei_doc.get_data() tei_body = tei_data['body'] r = reader.TEIReader(real_data_file) read_body, read_metadata = r.read() self.assertEqual(tei_body, read_body) json_name = self._get_test_file_name(JSON_ZHI) global_doc = Document.from_json(json_name) self.assertEqual(read_body, global_doc.raw_body) self.assertEqual(read_metadata, global_doc.metadata)
def test_smoke(self): """ Smoke test - check that the preprocessor runs without exploding """ pp = Preprocessor(file_name=self.file_name, input_dir=self.input_dir, output_dir=self.output_dir) pp.process() out_dir_files = os.listdir(self.output_dir) for file_name in out_dir_files: name = utilities.path.get_name(self.file_name, extension=False) if name in file_name: file_path = os.path.join(self.output_dir, file_name) doc = Document.from_json(file_path) self.assertNotEqual(doc.pre_file_name, self.file_name) self.assertEqual(doc.file_name, 'test_preprocessed/lorem.json')
def setUp(self): self.doc = Document(file_name=self.file_name, metadata=self.meta, pre_file_name=self.pre_file_name, raw_file_name=self.raw_file_name)
def post(self): documentName = Document.newname() htmlcontent = constants.HTML_DEFAULT csscontent = constants.CSS_DEFAULT documents = Document(parent=Document.getkey(documentName)) documents.htmlcontent = htmlcontent documents.csscontent = csscontent documents.documentName = documentName documents.name = constants.DEFAULT_NAME documents.key = Document.getkey(documentName) documents.put() query_params = {'documentName':documentName} self.redirect('/edit?' + urllib.urlencode(query_params))
class DocumentTest(unittest.TestCase): file_name = u'models/test_data/lorem.json' meta = {'title': u'test 稢綌', 'author': u'gorden 胇赲' } body = u'In id tristique orci. 痵痽 犵艿邔 疿疶砳 齸圞趲.' pre_file_name = file_name + '_PRE.json' raw_file_name = file_name def setUp(self): self.doc = Document(file_name=self.file_name, metadata=self.meta, pre_file_name=self.pre_file_name, raw_file_name=self.raw_file_name) def test_clone(self): """ Test cloning a document """ doc_cloned = self.doc.clone() self.assertEqual(doc_cloned.file_name, self.doc.file_name) self.assertEqual(doc_cloned.pre_file_name, self.doc.pre_file_name) self.assertEqual(doc_cloned.raw_file_name, self.doc.raw_file_name) self.assertEqual(doc_cloned.metadata, self.doc.metadata) self.assertEqual(doc_cloned.raw_body, self.doc.raw_body) self.assertEqual(self.doc, doc_cloned) # using assertFalse instead of assertNotEqual in order to # test __eq__ doc_cloned.file_name = u'nope' self.assertFalse(self.doc == doc_cloned) doc_cloned.file_name = self.doc.file_name doc_cloned.metadata = None self.assertFalse(self.doc == doc_cloned) doc_cloned.metadata = self.doc.metadata doc_cloned.raw_file_name = '' self.assertFalse(self.doc == doc_cloned) doc_cloned.raw_file_name = self.doc.raw_file_name doc_cloned.pre_file_name = '' self.assertFalse(self.doc == doc_cloned) def test_to_dict(self): """ Test conversion to dictionary (for json serialization) """ doc_dict = self.doc.to_dict() self.assertEqual(doc_dict['file_name'], self.doc.file_name) self.assertEqual(doc_dict['metadata'], self.doc.metadata) self.assertEqual(doc_dict['pre_file_name'], self.doc.pre_file_name) # TODO check raw def test_open(self): """ Test opening a Document json """ self.assertRaises(InvalidDocumentException, Document.from_json, 'models/test_data/invalid.json') self.assertRaises(InvalidDocumentException, Document.from_json, 'models/test_data/invalid.txt')