def get_upload_information(request): submission_id = request.GET.get('submission_id') # tonietuk's intercept starts if not submission_id: data = {'found': False} return HttpResponse(json.dumps(data)) # tonietuk's intercept ends # get submission collection and check status sub = Submission().get_record(submission_id) if sub: if not sub['complete'] or sub['complete'] == 'false': rem = RemoteDataFile().get_by_sub_id(submission_id) if rem: speeds = rem['transfer_rate'][-100:] complete = rem['pct_completed'] data = {'speeds': speeds, 'complete': complete, 'finished': False, 'found': True} return HttpResponse(json.dumps(data)) else: # elapsed = str(parser.parse(sub['completed_on']) - parser.parse(sub['commenced_on'])) # data = {'upload_time': str(elapsed), 'completed_on': sub['completed_on'], 'article_id': sub.get('article_id'), 'finished': True, 'found': True} data = {'sub_id': str(sub['_id']), 'status': sub['status'], 'accessions': sub['accessions'], 'repo': sub['repository'], 'completed_on': sub['completed_on'].strftime("%Y-%m-%d %H:%M:%S"), 'article_id': sub.get('article_id'), 'finished': True, 'found': True} return HttpResponse(json.dumps(data)) data = {'found': False} return HttpResponse(json.dumps(data))
def resolve_submission_id(request, submission_id): sub = Submission().get_record(submission_id) # get all file metadata output = dict() files = list() for f in sub.get("bundle", list()): file = DataFile().get_record(f) files.append(file["description"]["attributes"]) output["files"] = files output["accessions"] = sub["accessions"] output["metadata"] = {} output["metadata"]["dc"] = sub["meta"]["fields"] return HttpResponse(j.dumps(output))
def _do_file_transfer(self): submission_record = Submission().get_record(self.submission_id) # do we have files to be uploaded? bundle_df = pd.DataFrame(submission_record.get("bundle_meta", list())) if len(bundle_df ) == 0: # insufficient information to proceed - no bundle meta return pending_df = bundle_df[bundle_df['upload_status'] == False] if len(pending_df) > 0: path2library = os.path.join( BASE_DIR, REPOSITORIES['ASPERA']['resource_path']) user_name = REPOSITORIES['ASPERA']['user_token'] password = REPOSITORIES['ASPERA']['password'] # compose remote file directory remote_path = d_utils.get_ena_remote_path(self.submission_id) self._do_aspera_transfer(user_name=user_name, password=password, remote_path=remote_path, file_path=list(pending_df['file_path']), path2library=path2library) else: # no files to be uploaded transfer_fields = dict() transfer_fields["transfer_status"] = "completed" transfer_fields["pct_completed"] = '100' transfer_fields["current_time"] = datetime.now().strftime( "%d-%m-%Y %H:%M:%S") # save collected metadata to the transfer record RemoteDataFile().update_transfer(self.transfer_token, transfer_fields) self.context["ena_status"] = "files_transferred" return
def get_upload_information(request): context = dict() ids = json.loads(request.POST.get("ids", "[]")) sub_info_list = list() submission_queue_handle = ghlper.get_submission_queue_handle() for id in ids: # get submission record and check submission status try: sub = Submission().get_record(id) except: sub = dict() if not sub: continue sub_info_dict = dict() sub_info_dict["submission_id"] = id sub_info_dict["enable_submit_button"] = True repo = sub.get("repository", str()).lower() if repo in ["cg_core", "dataverse", "dspace", "ckan"]: if "meta" in sub and "fields" in sub[ "meta"] or "identifier" in sub["meta"]: pass else: sub_info_dict["enable_submit_button"] = False if str(sub.get("complete", False)).lower() == 'true': # submission has finished sub_info_dict["submission_status"] = True sub_info_dict["completed_on"] = sub.get( 'completed_on', str()).strftime('%d %b, %Y, %H:%M') if sub.get( 'completed_on', str()) else 'unavailable' try: sub_info_dict["article_id"] = sub['article_id'] except: sub_info_dict["article_id"] = "unavailable" # get study embargo info if repo == "ena": # get study accession prj = sub.get('accessions', dict()).get('project', [{}]) status = prj[0].get("status", "Unknown") release_date = prj[0].get("release_date", str()) if status.upper() == "PRIVATE": sub_info_dict["release_status"] = "PRIVATE" sub_info_dict["release_date"] = release_date if len(release_date) >= 10: # e.g. '2019-08-30' try: datetime_object = datetime.strptime( release_date[:10], '%Y-%m-%d') sub_info_dict["release_date"] = time.strftime( '%a, %d %b %Y %H:%M', datetime_object.timetuple()) except: pass sub_info_dict["release_message"] = "<div>All objects in this " \ "submission are set to " \ "private (confidential) status.</div>" \ "<div style='margin-top:10px;'>The release date is set for " \ "" + sub_info_dict["release_date"] + \ ".</div><div style='margin-top:10px;'>" \ "To release this study to the public, " \ "click the release study button.</div>" elif status.upper() == "PUBLIC": sub_info_dict["release_status"] = "PUBLIC" sub_info_dict[ "study_view_url"] = "https://www.ebi.ac.uk/ena/data/view/" + prj[ 0].get("accession", str()) sub_info_dict["release_message"] = "<div>All objects in " \ "this submission are set to public status.</div> " \ "<div style='margin-top:10px;'>To view this study " \ "on the ENA browser (opens in a new browser tab), " \ "click the view on ENA button.</div>" else: sub_info_dict["release_status"] = "Unknown" sub_info_dict["release_message"] = "<div>The embargo status of " \ "this study is unknown.</div>" \ "<div>For more details, please contact your administrator. " \ "Alternatively, you can try searching for the study on the " \ "ENA browser to verify its status.</div>" else: sub_info_dict["is_active_submission"] = False if repo == "ena": # this will be extended to other repositories/submission end-points submission_in_queue = submission_queue_handle.find_one( {"submission_id": sub_info_dict["submission_id"]}) if submission_in_queue: # submission not queued, flag up to enable resubmission sub_info_dict["is_active_submission"] = True # get status report status = sub.get("transcript", dict()).get('status', dict()) if status: # status types are either 'info' or 'error' sub_info_dict["submission_report"] = dict( type=status.get('type', str()), message=status.get('message', str())) # report on submitted datafiles - ENA for now... if repo == "ena": run_accessions = sub.get('accessions', dict()).get('run', list()) submitted_files = [ x for y in run_accessions for x in y.get('datafiles', list()) ] if submitted_files: sub_info_dict["submitted_files"] = submitted_files sub_info_list.append(sub_info_dict) context["submission_information"] = sub_info_list out = jsonpickle.encode(context) return HttpResponse(out, content_type='application/json')
class DataverseSubmit(object): def __init__(self, submission_id=str()): self.submission_id = submission_id self.submission_record = dict() self.file_path = str() self.host = str() self.api_token = str() self.headers = dict() if self.submission_id: # get submission record self.submission_record = Submission().get_record( self.submission_id) # set up submission parameters... # submission path dir = os.path.join(os.path.dirname(__file__), "data") self.file_path = os.path.join( os.path.join(dir, self.submission_id), 'dataverse') # dataverse host self.host = self.submission_record.get("destination_repo", dict()).get("url", str()) # api_token self.api_token = self.submission_record.get( "destination_repo", dict()).get("apikey", str()) # headers self.headers = {'X-Dataverse-key': self.api_token} def submit(self): """ function controls the submission of objects to a Dataverse :return: """ sub_meta = self.submission_record.get("meta", dict()) # if dataset id in submission meta, we are adding to existing dataset, otherwise # we are creating a new dataset if "fields" in sub_meta: return self._create_and_add_to_dataverse() elif ('entity_id' in sub_meta and 'alias' in sub_meta) or ('dataverse_alias' in sub_meta and 'doi' in sub_meta): return self._add_to_dataverse() def truncate_url(self, url): if url.startswith('https://'): url = url[8:] elif url.startswith('http://'): url = url[7:] return url @staticmethod def get_format_doi(doi): """ function formats passed doi for api calls to dataverse :param doi: :return: """ doi_prefixes = [ "https://doi.org/", "http://doi.org/", "https://", "http://", "doi.org/" ] for dp in doi_prefixes: if dp in doi: doi = "doi:" + doi.split("https://doi.org/")[-1] return doi def clear_submission_metadata(self): Submission().clear_submission_metadata(self.submission_id) def get_dataverse_details(self, dataverse_alias): """ function retrieves dataverse details given its alias :param dataverse_alias: :return: """ response_data = dict() try: url = self.host + "/api/dataverses/" + dataverse_alias response = requests.get(url) if str(response.status_code).lower() in ("ok", "200"): response_data = response.json().get("data", dict()) except Exception as e: exception_message = "Error retrieving dataverse details " + url + " : " + str( e) self.report_error(exception_message) return response_data def get_dataset_details(self, doi): """ function retrieves dataset details given its doi :param doi: :return: """ response_data = dict() # retrieve dataset details given its doi headers = {'X-Dataverse-key': self.api_token} # get formatted doi doi = self.get_format_doi(doi) params = (('persistentId', doi), ) try: url = self.host + "/api/datasets/:persistentId/" response = requests.get(url, headers=headers, params=params) if str(response.status_code).lower() in ("ok", "200"): response_data = response.json().get("data", dict()) except Exception as e: exception_message = "Error retrieving dataset details " + url + " : " + str( e) self.report_error(exception_message) return response_data def _add_to_dataverse(self): """ function adds datafiles to a dataset :return: """ sub = self.submission_record # check for dataverse alias alias = sub.get("meta", dict()).get( "dataverse_alias", str()) or sub.get("meta", dict()).get( "alias", str()) if not alias: return {"status": 404, "message": "\n Error getting dataverse"} # check for dataset doi doi = sub.get("meta", dict()).get("doi", str()) if not doi: return {"status": 404, "message": "\n Error getting dataset"} # add file to dataset result = self.send_files_curl(persistent_id=doi) if result is True: # store accessions and clear submission dv_response_data = self.get_dataverse_details(alias) ds_response_data = self.get_dataset_details(doi) dataset_title = [ x["value"] for x in ds_response_data.get( "latestVersion", dict()).get("metadataBlocks", dict()).get( "citation", dict()).get("fields", dict()) if x.get("typeName", str()) == "title" ] acc = dict() acc['dataset_id'] = ds_response_data.get("id", str()) acc['dataset_doi'] = doi acc['dataverse_alias'] = alias acc['dataverse_title'] = dv_response_data.get("name", "N/A") acc['dataset_title'] = "N/A" if dataset_title: if isinstance(dataset_title, list): acc['dataset_title'] = dataset_title[0] elif isinstance(dataset_title, str): acc['dataset_title'] = dataset_title sub['accessions'] = acc sub['target_id'] = sub.pop('_id', self.submission_id) Submission().save_record(dict(), **sub) self.clear_submission_metadata() return result def _create_and_add_to_dataverse(self): """ creates a Dataset in a Dataverse :param submission_record: :return: """ # proceed with the creation of a dataset iff no accessions are recorded dataset_persistent_id = self.submission_record.get( "accessions", dict()).get("dataset_doi", str()) # there's existing submission associated with this submission if dataset_persistent_id: return self.post_dataset_creation( persistent_id=dataset_persistent_id) # get dataverse alias dataverse_alias = self.submission_record.get("meta", dict()).get( "alias", str()) if not dataverse_alias: exception_message = 'Dataverse alias not found! ' self.report_error(exception_message) return exception_message # raise OperationFailedError(exception_message) # convert to Dataset metadata metadata_file_path = self.do_conversion() # make API call api_call = 'curl -H "X-Dataverse-key: {api_token}" -X POST ' \ '{server_url}/api/dataverses/{dv_alias}/datasets --upload-file {dataset_json}' api_call = api_call.format(api_token=self.api_token, server_url=self.host, dv_alias=dataverse_alias, dataset_json=metadata_file_path) # retrieve call result try: receipt = subprocess.check_output(api_call, shell=True) receipt = json.loads(receipt.decode('utf-8')) except Exception as e: exception_message = 'API call error: ' + str(e) self.report_error(exception_message) return exception_message # raise OperationFailedError(exception_message) else: if receipt.get("status", str()).lower() in ("ok", "200"): receipt = receipt.get("data", dict()) else: exception_message = 'The Dataset could not be created. ' + str( receipt) self.report_error(exception_message) return exception_message # raise OperationFailedError(exception_message) dataset_persistent_id = receipt.get("persistentId", str()) dataset_id = receipt.get("id", str()) # retrieve and store accessions to db sub = self.submission_record acc = dict() acc['dataset_id'] = dataset_id acc['dataset_doi'] = dataset_persistent_id acc['dataverse_alias'] = dataverse_alias acc['dataset_title'] = "N/A" # retrieve dataverse details given its alias dv_response_data = self.get_dataverse_details(dataverse_alias) acc['dataverse_title'] = dv_response_data.get("name", "N/A") # retrieve dataset details given its doi ds_response_data = self.get_dataset_details(dataset_persistent_id) dataset_title = [ x["value"] for x in ds_response_data.get("latestVersion", dict()).get( "metadataBlocks", dict()).get("citation", dict()).get( "fields", dict()) if x.get("typeName", str()) == "title" ] if dataset_title: if isinstance(dataset_title, list): acc['dataset_title'] = dataset_title[0] elif isinstance(dataset_title, str): acc['dataset_title'] = dataset_title # update submission record with accessions sub['accessions'] = acc sub['target_id'] = sub.pop('_id', self.submission_id) Submission().save_record(dict(), **sub) # do post creation tasks return self.post_dataset_creation(persistent_id=dataset_persistent_id) def post_dataset_creation(self, persistent_id=str()): """ upon completion of dataset creation, perform this task(s) :param persistent_id: :return: """ # add file to dataset result = self.send_files_curl(persistent_id=persistent_id) if result is True: self.clear_submission_metadata() return result def send_files(self, sub, ds): for id in sub['bundle']: file = DataFile().get_record(ObjectId(id)) file_location = file['file_location'] file_name = file['name'] with open(file_location, 'rb') as f: contents = f.read() ds.upload_file(file_name, contents, zip_files=False) def send_files_curl(self, persistent_id=str()): """ function uses curl to add datafiles to a Dataverse dataset, given its persistent_id (DOI) :param persistent_id: :return: """ # get submission record sub = self.submission_record # get formatted doi persistent_id = self.get_format_doi(persistent_id) datafiles = sub.get("bundle_meta", list()) # get all pending files pending_files = [ x for x in datafiles if x.get("upload_status", False) is False ] if not pending_files: # update status and exit method if sub.get("complete", False) is False: sub['complete'] = True sub['completed_on'] = datetime.now() sub['target_id'] = sub.pop('_id', self.submission_id) Submission().save_record(dict(), **sub) return True # compose api call api_call = 'curl -H "X-Dataverse-key:{api_token}" -X ' \ 'POST -F \'file=@{data_file}\' -F \'jsonData={{"description":"Datafile","categories":["Data"], ' \ '"restrict":"true"}}\' "{server_url}/api/datasets/:persistentId/add?persistentId={persistent_id}"' api_call = api_call.format(api_token=self.api_token, server_url=self.host, persistent_id=persistent_id, data_file='mock-datafile') upload_error = "" for df in pending_files: upload_string = api_call.replace("mock-datafile", df.get("file_path", str())) try: receipt = subprocess.check_output(upload_string, shell=True) receipt = json.loads(receipt.decode('utf-8')) except Exception as e: exception_message = "Error uploading file " + df.get( "file_path", str()) + " : " + str(e) self.report_error(exception_message) upload_error = upload_error + "\n" + exception_message else: if receipt.get("status", str()).lower() in ("ok", "200"): df["upload_status"] = True else: exception_message = "Error uploading file " + df.get( "file_path", str()) + " : " + str(receipt) self.report_error(exception_message) upload_error = upload_error + "\n" + exception_message # if all files uploaded, mark submission as complete pending_files = [ x for x in pending_files if x.get("upload_status", False) is False ] if pending_files: return {"status": 404, "message": upload_error} sub['complete'] = True sub['completed_on'] = datetime.now() sub['target_id'] = sub.pop('_id', self.submission_id) Submission().save_record(dict(), **sub) return True def _get_connection(self): dvurl = self.host['url'] apikey = self.host['apikey'] dvurl = self.truncate_url(dvurl) c = Connection(dvurl, apikey) return c def _get_dataverse(self, profile_id): # create new dataverse if none already exists u = data_utils.get_current_user() # create new dataverse if none exists already dv_details = Profile().check_for_dataverse_details(profile_id) if not dv_details: # dataverse = connection.create_dataverse(dv_alias, '{0} {1}'.format(u.first_name, u.last_name), u.email) dv_details = self._create_dataverse(profile_id) Profile().add_dataverse_details(profile_id, dv_details) return dv_details def _create_dataverse(self, meta, conn): alias = str(uuid.uuid4()) email = "" for f in meta["fields"]: if f["dc"] == "dc.title": name = f["vals"][0] if f["dc"] == "dc.email": email = f["vals"][0] if email == "": u = ThreadLocal.get_current_user() email = u.email dv = conn.create_dataverse(alias, name, email) return dv def _create_dataset(self, meta, dv, conn): dv.create_dataset() x = self._make_dataset_xml(meta) Dataset.from_xml_file() def _get_dataset(self, profile_id, dataFile_ids, dataverse): # create new dataset if none exists already ds_details = Profile().check_for_dataset_details(profile_id) if not ds_details: ds_details = self._create_dataset(dataFile_ids=dataFile_ids, dataverse=dataverse) Profile().add_dataverse_dataset_details(profile_id, ds_details) return ds_details def _make_dataset_xml(self, sub): meta = sub['meta'] # iterate through meta to get fields d = dict() datafile = DataFile().get_record(ObjectId(sub['bundle'][0])) df = datafile['description']['attributes'] xml = '<?xml version="1.0"?>' xml = xml + '<entry xmlns="http://www.w3.org/2005/Atom" xmlns:dcterms="http://purl.org/dc/terms/">' xml = xml + '<dcterms:contributor>' + "*****@*****.**" + '</dcterms:contributor>' for item in meta["fields"]: if type(item["vals"]) == type(""): tail = item["dc"].split(".")[1] xml = xml + "<dcterms:" + tail + '>' + item[ "vals"] + "</dcterms:" + tail + '>' elif type(item["vals"] == type(list())): for val in item["vals"]: tail = item["dc"].split(".")[1] xml = xml + '<dcterms:' + tail + '>' + val + '</dcterms:' + tail + '>' xml = xml + "</entry>" path = os.path.dirname(datafile['file_location']) xml_path = os.path.join(path, 'xml.xml') with open(xml_path, 'w+') as f: f.write(xml) return xml_path def _update_submission_record(self, sub, dataset, dataverse, dv_storageIdentifier=None): # add mongo_file id acc = dict() acc['storageIdentifier'] = dv_storageIdentifier acc['mongo_file_id'] = dataset.id acc['dataset_doi'] = dataset.doi acc['dataset_edit_media_uri'] = dataset.edit_media_uri acc['dataset_edit_uri'] = dataset.edit_uri acc['dataset_is_deleted'] = dataset.is_deleted acc['dataset_title'] = dataset.title acc['dataverse_title'] = dataset.dataverse.title acc['dataverse_alias'] = dataset.dataverse.alias acc['dataset_id'] = dataset._id # save accessions to mongo profile record sub['accessions'] = acc sub['complete'] = True sub['target_id'] = str(sub.pop('_id')) Submission().save_record(dict(), **sub) Submission().mark_submission_complete(sub['target_id']) return True def _listize(list): # split list by comma if list == '': return None else: return list.split(',') def publish_dataverse(self, sub_id): # get url for dataverse self.host = Submission().get_dataverse_details(sub_id) self.headers = {'X-Dataverse-key': self.host['apikey']} submission = Submission().get_record(sub_id) dvAlias = submission['accessions']['dataverse_alias'] dsId = submission['accessions']['dataset_id'] conn = self._get_connection() dv = conn.get_dataverse(dvAlias) # ds = dv.get_dataset_by_doi(dsDoi) if not dv.is_published: dv.publish() # POST http://$SERVER/api/datasets/$id/actions/:publish?type=$type&key=$apiKey url = submission['destination_repo']['url'] url = url + '/api/datasets/' + str( dsId) + '/actions/:publish?type=major' print(url) resp = requests.post(url, data={ 'type': 'major', 'key': self.host['apikey'] }, headers=self.headers) if resp.status_code != 200 or resp.status_code != 201: raise OperationFailedError('The Dataset could not be published. ' + resp.content) doc = Submission().mark_as_published(sub_id) return doc def publish_dataset(self, dataset_id): url = self.host['url'] + '/api/datasets/' + str( dataset_id) + '/actions/:publish?type=major' resp = requests.post(url, data={ 'type': 'major', 'key': self.host['apikey'] }, headers=self.headers) if resp.status_code not in (200, 201): raise OperationFailedError('Dataset could not be published. ' + resp.content) return False return True def dc_dict_to_dc(self, sub_id): # get file metadata, call converter to strip out dc fields s = Submission().get_record(ObjectId(sub_id)) f_id = s["bundle"][0] items = CgCoreSchemas().extract_repo_fields(str(f_id), "dataverse") temp_id = "copo:" + str(sub_id) # add the submission_id to the dataverse metadata to allow backwards treversal from dataverse items.append({ "dc": "dc.relation", "copo_id": "submission_id", "vals": temp_id }) Submission().update_meta(sub_id, json.dumps(items)) def get_registered_types(self): """ function uses a schema mapping of Dataverse types to drive conversion from cgcore to dataverse metadata Schema source: https://docs.google.com/spreadsheets/d/13HP-jI_cwLDHBetn9UKTREPJ_F4iHdAvhjmlvmYdSSw/edit#gid=0 :return: """ df = pd.read_csv( os.path.join(RESOLVER["cg_core_utils"], 'dataverse_schema_mapping.csv')) df.value = df['value'].fillna('') df.parent = df['parent'].fillna('') df.copo_id = df['copo_id'].fillna('') df.rename(index=str, columns={"dataverse_id": "typeName"}, inplace=True) Attribute = namedtuple('Attribute', [ 'typeName', 'typeClass', 'multiple', 'value', 'parent', 'copo_id' ]) registered_attibutes = [Attribute(**x) for x in df.to_dict('records')] return registered_attibutes def do_conversion(self): """ function manages the conversion from CG Core to Dataverse types :return: """ template = self.get_metadata_template() user_data = self.submission_record.get("meta", dict()).get("fields", list()) citation_fragment = template["datasetVersion"]["metadataBlocks"][ "citation"] citation_fragment["fields"] = self.get_dv_attributes( user_data=user_data) citation_fragment["displayName"] = self.get_display_name() return self.dump_metadata(template) def get_metadata_template(self): schemas_utils_paths = RESOLVER["cg_core_utils"] try: template = data_utils.json_to_pytype( os.path.join(schemas_utils_paths, 'dataverse_dataset_template.json')) except Exception as e: self.report_error("Couldn't retrieve Dataverse template. " + str(e)) raise return template def get_dv_attributes(self, user_data): """ function sets attribute value for Dataverse fields from user data :param user_data: :return: """ fields = list() for attrib in self.get_registered_types(): # dependent attribute - ignore for now if attrib.parent: continue # predefined values elif type(attrib.value) is bool or attrib.value: field = dict(attrib._asdict()) if attrib.multiple is True: field['value'] = [field['value']] field.pop('parent', None) field.pop('copo_id', None) fields.append(field) # primitive and controlledVocabulary types elif attrib.typeClass in ["primitive", "controlledVocabulary"]: val = [ x["vals"] for x in user_data if x.get("copo_id", str()) == attrib.copo_id ] if val: val = val[0] field = self.get_dv_primitive(attrib, val) if field: fields.append(field) # compound type elif attrib.typeClass == "compound": children = [ x for x in self.get_registered_types() if x.parent == attrib.typeName ] if not children: continue values = list() children_values = dict() for child in children: # obtain predefined values predefined_children_values = list() if type(child.value) is bool or child.value: vals = child.value if attrib.multiple is True: vals = [vals] predefined_children_value = self.get_dv_primitive( child, vals) if predefined_children_value: predefined_children_values.append( predefined_children_value) continue vals = [ x["vals"] for x in user_data if x.get("copo_id", str()) == child.copo_id ] if vals: vals = vals[0] if not isinstance(vals, list): vals = [vals] for indx, vv in enumerate(vals): children_values.setdefault(indx, []).append( self.get_dv_primitive(child, vv)) for entry in children_values: new_dict = dict() for descendant in children_values[entry]: new_dict[descendant["typeName"]] = descendant # add predefined children values for descendant in predefined_children_values: new_dict[descendant["typeName"]] = descendant values.append(new_dict) field = self.get_dv_primitive( attrib, [1]) # pass any value to generate parent dict field["value"] = values fields.append(field) return fields def get_dv_primitive(self, attrib, val): """ function returns schema fragment for a dataverse primitive type, given val :param attrib: :param val: :return: """ field = dict() if isinstance(val, list) and attrib.multiple is False: value = val[0] elif not isinstance(val, list) and attrib.multiple is True: value = [val] else: value = val if value: field = dict(attrib._asdict()) field['value'] = value field.pop('parent', None) field.pop('copo_id', None) return field def get_display_name(self): """ sets display name for Dataset :return: """ profile = DAComponent(component="profile").get_record( self.submission_record.get("profile_id", str())) return profile.get("title", str()) def dump_metadata(self, dv_metadata): """ function write converted metadata to file and returns the path on success :return: """ # create submission file path if not os.path.exists(self.file_path): try: os.makedirs(self.file_path) except Exception as e: self.report_error("Error creating submission file path. " + str(e)) raise path_to_json = os.path.join(self.file_path, 'dataset.json') try: with open(path_to_json, "w") as ff: ff.write(json.dumps(dv_metadata)) except Exception as e: self.report_error("Error writing Dataset metadata to file. " + str(e)) raise return path_to_json def report_error(self, error_message): print(error_message) try: lg.log('Submission ID: ' + self.submission_id + " " + error_message, level=Loglvl.ERROR, type=Logtype.FILE) except Exception as e: pass return False
def _do_aspera_transfer(self, user_name=None, password=None, remote_path=None, file_path=None, path2library=None): lg.log('Starting aspera transfer', level=Loglvl.INFO, type=Logtype.FILE) kwargs = dict(target_id=self.submission_id, commenced_on=str(datetime.now())) Submission().save_record(dict(), **kwargs) f_str = ' '.join(file_path) cmd = "./ascp -d -QT -l700M -L- {f_str!s} {user_name!s}:{remote_path!s}".format( **locals()) lg.log(cmd, level=Loglvl.INFO, type=Logtype.FILE) os.chdir(path2library) try: thread = pexpect.spawn(cmd, timeout=None) thread.expect(["assword:", pexpect.EOF]) thread.sendline(password) cpl = thread.compile_pattern_list([pexpect.EOF, '(.+)']) while True: i = thread.expect_list(cpl, timeout=None) if i == 0: # EOF! Possible error point if encountered before transfer completion print("Process termination - check exit status!") break elif i == 1: pexp_match = thread.match.group(1) prev_file = '' tokens_to_match = [ "Mb/s", "status=success", "status=started" ] units_to_match = ["KB", "MB", "GB"] rates_to_match = [ "Kb/s", "kb/s", "Mb/s", "mb/s", "Gb/s", "gb/s" ] time_units = ['d', 'h', 'm', 's'] end_of_transfer = False if any(tm in pexp_match.decode("utf-8") for tm in tokens_to_match): transfer_fields = dict() tokens = pexp_match.decode("utf-8").split(" ") lg.log(tokens, level=Loglvl.INFO, type=Logtype.FILE) # has a file transfer started? if 'status=started' in tokens: # get the target file and update transfer record target_file = [ tk for tk in tokens if tk[:5] == "file=" or tk[:7] == "source=" ] for up_f in target_file: up_f_1 = up_f.split("=")[1].strip('"') # update file path and datafile id transfer_fields["file_path"] = up_f_1 submission_record = Submission().get_record( self.submission_id) bundle_meta = submission_record.get( "bundle_meta", list()) listed_file = [ indx for indx, elem in enumerate(bundle_meta) if elem['file_path'] == up_f_1 ] if listed_file: transfer_fields[ "datafile_id"] = bundle_meta[ listed_file[0]]["file_id"] # get original file size file_size_bytes = [ x for x in tokens if len(x) > 5 and x[:4] == 'size' ] if file_size_bytes: t = file_size_bytes[0].split("=")[1] transfer_fields["file_size_bytes"] = size( int(t), system=alternative) # extract other file transfer metadata if 'ETA' in tokens: # get %completed, bytes transferred, current time etc pct_completed = [ x for x in tokens if len(x) > 1 and x[-1] == '%' ] if pct_completed: transfer_fields[ "pct_completed"] = pct_completed[0][:-1] print( str(self.transfer_token) + ": " + transfer_fields["pct_completed"] + "% transferred") # bytes transferred bytes_transferred = [ x for x in tokens if len(x) > 2 and x[-2:] in units_to_match ] if bytes_transferred: transfer_fields[ "bytes_transferred"] = bytes_transferred[0] # transfer rate transfer_rate = [ x for x in tokens if len(x) > 4 and x[-4:] in rates_to_match ] if transfer_rate: transfer_fields[ "transfer_rate"] = transfer_rate[0] # current time - this will serve as the last time an activity was recorded transfer_fields["current_time"] = datetime.now( ).strftime("%d-%m-%Y %H:%M:%S") # has a file been successfully transferred? if 'status=success' in tokens: # get the target file and update its status in the submission record target_file = [ tk for tk in tokens if tk[:5] == "file=" or tk[:7] == "source=" ] for up_f in target_file: up_f_1 = up_f.split("=")[1].strip('"') submission_record = Submission().get_record( self.submission_id) bundle_meta = submission_record.get( "bundle_meta", list()) listed_file = [ indx for indx, elem in enumerate(bundle_meta) if elem['file_path'] == up_f_1 ] if listed_file: bundle_meta[ listed_file[0]]["upload_status"] = True kwargs = dict(target_id=self.submission_id, bundle_meta=bundle_meta) Submission().save_record(dict(), **kwargs) # is this the final file to be transferred? submission_record = Submission( ).get_record(self.submission_id) if "bundle_meta" in submission_record: pending_files = [ x["file_id"] for x in submission_record['bundle_meta'] if not x["upload_status"] ] if not pending_files: # we are all done! transfer_fields[ "transfer_status"] = "completed" transfer_fields[ "pct_completed"] = '100' transfer_fields[ "current_time"] = datetime.now( ).strftime("%d-%m-%Y %H:%M:%S") # save collected metadata to the transfer record RemoteDataFile().update_transfer( self.transfer_token, transfer_fields) thread.close() lg.log('Aspera Transfer completed', level=Loglvl.INFO, type=Logtype.FILE) except OSError: transfer_fields = dict() transfer_fields["error"] = "Encountered problems with file upload." transfer_fields["current_time"] = datetime.now().strftime( "%d-%m-%Y %H:%M:%S") lg.log('File upload error! Submission ID: ' + self.submission_id, level=Loglvl.ERROR, type=Logtype.FILE) # save error to transfer record RemoteDataFile().update_transfer(self.transfer_token, transfer_fields) return False finally: pass self.context["ena_status"] = "files_transferred" return