def create_dataset(database_id, dataset, dataset_dir, meta_types_db, **kwargs): """Create the dataset entry and metadata entries. database_id -- the dict key specifying the database in django dataset -- an AbstractDataset type dataset_dir -- the directory that the dataset can use to store documents and analysis directories to store intermediate results meta_types_db -- what is returned by the get_all_metadata_types method in the metadata.utilities module Keyword Arguments: public -- make this dataset public (anybody can explore it) public_documents -- make the document text publicly available Return the Dataset django database object after creation. """ with transaction.atomic(using=database_id): dataset_db, created = Dataset.objects.using(database_id).\ get_or_create(name=dataset.name, dataset_dir=dataset_dir) if created: dataset_db.public = kwargs.setdefault('public', False) dataset_db.public_documents = kwargs.setdefault('public_documents', False) dataset_db.visible = False dataset_db.save() metadata_types = dataset.metadata_types create_metadata_types(database_id, metadata_types, meta_types_db) create_metadata(database_id, [dataset_db], DatasetMetadataValue, 'dataset', metadata_types, meta_types_db, [dataset.metadata]) return dataset_db
def create_analysis(database_id, dataset_db, analysis, meta_types_db): """Create the dataset entry and metadata entries. database_id -- the dict key specifying the database in django dataset_db -- the Dataset django database object analysis -- an AbstractAnalysis object meta_types_db -- what is returned by the get_all_metadata_types method in the metadata.utilities module Return the Analysis django database object created. """ with transaction.atomic(using=database_id): analysis_db, created = Analysis.objects.using(database_id).\ get_or_create(dataset=dataset_db, name=analysis.name) if created: metadata_types = analysis.metadata_types create_metadata_types(database_id, metadata_types, meta_types_db) create_metadata(database_id, [analysis_db], AnalysisMetadataValue, 'analysis', metadata_types, meta_types_db, [analysis.metadata]) return analysis_db
def bulk_create_documents(documents, metadata): if len(documents) == 0: return with transaction.atomic(using=database_id): low_high = (documents[0].index, documents[-1].index) # create document entries Document.objects.using(database_id).bulk_create(documents) names = [] for doc in documents: names.append(doc.filename) # retrieve documents from database since bulk_create doesn't return # a primary key documents_db = \ Document.objects.using(database_id).filter(dataset=dataset_db, index__range=low_high).order_by('index') # create metadata entries create_metadata(database_id, documents_db, DocumentMetadataValue, 'document', document_metadata_types, meta_types_db, metadata) del documents[:] del metadata[:]