def get_upload_information(request): submission_id = request.GET.get('submission_id') # tonietuk's intercept starts if not submission_id: data = {'found': False} return HttpResponse(json.dumps(data)) # tonietuk's intercept ends # get submission collection and check status sub = Submission().get_record(submission_id) if sub: if not sub['complete'] or sub['complete'] == 'false': rem = RemoteDataFile().get_by_sub_id(submission_id) if rem: speeds = rem['transfer_rate'][-100:] complete = rem['pct_completed'] data = {'speeds': speeds, 'complete': complete, 'finished': False, 'found': True} return HttpResponse(json.dumps(data)) else: # elapsed = str(parser.parse(sub['completed_on']) - parser.parse(sub['commenced_on'])) # data = {'upload_time': str(elapsed), 'completed_on': sub['completed_on'], 'article_id': sub.get('article_id'), 'finished': True, 'found': True} data = {'sub_id': str(sub['_id']), 'status': sub['status'], 'accessions': sub['accessions'], 'repo': sub['repository'], 'completed_on': sub['completed_on'].strftime("%Y-%m-%d %H:%M:%S"), 'article_id': sub.get('article_id'), 'finished': True, 'found': True} return HttpResponse(json.dumps(data)) data = {'found': False} return HttpResponse(json.dumps(data))
def test_dspace_existing_submission(self): # pass to submit method s = Submission().get_record(self.s_ckan_new) request = self.client.post(path='/rest/submit_to_repo/', data={"sub_id": s["_id"]}) s = Submission().get_record(self.s_ckan_new) self.assertTrue("result" in s["accessions"][0]) self.assertTrue("id" in s["accessions"][0]["result"])
def test_submit_existing_cgcore_dataverse(self): # method will test the submission of a copo cgcore record to an existing dataset within a dataverse s = Submission().get_record(self.s_dv) request = self.client.post(path='/rest/submit_to_repo/', data={"sub_id": s["_id"]}) s = Submission().get_record(self.s_dv) self.assertTrue("result" in s["accessions"][0]) self.assertTrue("id" in s["accessions"][0]["result"])
def dc_dict_to_dc(self, sub_id): # get file metadata, call converter to strip out dc fields s = Submission().get_record(ObjectId(sub_id)) f_id = s["bundle"][0] items = CgCoreSchemas().extract_repo_fields(str(f_id), "ckan") Submission().update_meta(sub_id, json.dumps(items))
def get_repo_info(request, sub=None): # this ajax method is called when user clicks "inspect repo" button on submission view try: if not sub: sub_id = request.GET['sub_id'] else: sub_id = sub s = Submission().get_record(ObjectId(sub_id)) repo = s['destination_repo'] # if sub type is cg_core, do conversion from interim to dc if s["is_cg"]: if repo["type"] == "dataverse": ds().dc_dict_to_dc(sub_id) elif repo["type"] == "ckan": ckan().dc_dict_to_dc(sub_id) elif repo["type"] == "dspace": dspace().dc_dict_to_dc(sub_id) except Exception as e: print(e) return HttpResponse( json.dumps({ "status": 404, "message": "error getting dataverse" })) s = Submission().get_record(ObjectId(sub_id)) out = { 'repo_type': repo['type'], 'repo_url': repo['url'], 'meta': s["meta"] } return HttpResponse(json.dumps(out))
def update_submission_repo_data(request): task = request.POST['task'] submission_id = request.POST['submission_id'] if task == 'change_destination': custom_repo_id = request.POST['custom_repo_id'] submission_id = request.POST['submission_id'] s = Submission().update_destination_repo(repo_id=custom_repo_id, submission_id=submission_id) s['record_id'] = str(submission_id) clear_submission_metadata(request) get_repo_info(request, sub=submission_id) return HttpResponse(json_util.dumps(s)) elif task == 'change_meta': meta = json.loads(request.POST['meta']) new_or_existing = meta["new_or_existing"] if request.POST.get("type") == "dspace": if new_or_existing == "new": r_type = request.POST["type"] # add meta to separate dict field meta["new_or_existing"] = new_or_existing meta["repo_type"] = r_type m = Submission().get_record(ObjectId(submission_id))["meta"] meta["fields"] = m elif request.POST.get("type") == "dataverse" or request.POST.get( "type") == "ckan": if new_or_existing == "new": m = Submission().get_record(ObjectId(submission_id))["meta"] meta["fields"] = m meta["repo_type"] = request.POST["type"] # now update submission record if type(meta) == type(dict()): meta = json.dumps(meta) s = Submission().update_meta(submission_id=submission_id, meta=meta) return HttpResponse(json.dumps(s))
def __init__(self, submission_id=str()): self.submission_id = submission_id self.submission_record = dict() self.file_path = str() self.host = str() self.api_token = str() self.headers = dict() if self.submission_id: # get submission record self.submission_record = Submission().get_record( self.submission_id) # set up submission parameters... # submission path dir = os.path.join(os.path.dirname(__file__), "data") self.file_path = os.path.join( os.path.join(dir, self.submission_id), 'dataverse') # dataverse host self.host = self.submission_record.get("destination_repo", dict()).get("url", str()) # api_token self.api_token = self.submission_record.get( "destination_repo", dict()).get("apikey", str()) # headers self.headers = {'X-Dataverse-key': self.api_token}
def publish_dataverse(self, sub_id): # get url for dataverse self.host = Submission().get_dataverse_details(sub_id) self.headers = {'X-Dataverse-key': self.host['apikey']} submission = Submission().get_record(sub_id) dvAlias = submission['accessions']['dataverse_alias'] dsId = submission['accessions']['dataset_id'] conn = self._get_connection() dv = conn.get_dataverse(dvAlias) # ds = dv.get_dataset_by_doi(dsDoi) if not dv.is_published: dv.publish() # POST http://$SERVER/api/datasets/$id/actions/:publish?type=$type&key=$apiKey url = submission['destination_repo']['url'] url = url + '/api/datasets/' + str( dsId) + '/actions/:publish?type=major' print(url) resp = requests.post(url, data={ 'type': 'major', 'key': self.host['apikey'] }, headers=self.headers) if resp.status_code != 200 or resp.status_code != 201: raise OperationFailedError('The Dataset could not be published. ' + resp.content) doc = Submission().mark_as_published(sub_id) return doc
def tearDownClass(cls): u = User.objects.get(pk=1) u.delete() Profile().get_collection_handle().remove({"copo_id": "000000000"}) DataFile().get_collection_handle().remove({"_id": cls.d}) # Submission().get_collection_handle().remove({"_id": cls.s_dv}) Submission().get_collection_handle().remove({"_id": cls.s_ckan_new}) Submission().get_collection_handle().remove({"_id": cls.s_ckan_existing})
def tearDownClass(cls): u = User.objects.get(username=settings.TEST_USER_NAME) u.delete() Profile().get_collection_handle().remove({"copo_id": "000000000"}) DataFile().get_collection_handle().remove({"test_file": True}) Repository().get_collection_handle().remove({"_id": cls.r["_id"]}) Submission().get_collection_handle().remove({"_id": cls.s_dv}) Submission().get_collection_handle().remove({"_id": cls.s_ds_new}) Submission().get_collection_handle().remove({"_id": cls.s_ds_existing})
def process_request(self, request): url = request.get_full_path() if url.startswith('/copo', 0, 5): doc = Submission().get_incomplete_submissions_for_user(request.user.id, figshare) data_dict = dict() token = None if doc.count() > 0: if 'code' in request.GET and 'state' in request.GET: token_obtained = True for d in doc: if d.get('token_obtained') == 'false': token_obtained = False break if not token_obtained: # get new token from Figshare code = request.GET.get('code') client_id = FIGSHARE_CREDENTIALS['client_id'] token_url = FIGSHARE_API_URLS['authorization_token'] # now get token data = { 'client_id': client_id, 'code': code, 'client_secret': FIGSHARE_CREDENTIALS['client_secret'], 'grant_type': 'authorization_code', 'scope': 'all' } try: r = requests.post(token_url, data) data_dict = ast.literal_eval(r.content.decode('utf-8')) token = data_dict['token'] t = Figshare().put_token_for_user(user_id=ThreadLocal.get_current_user().id, token=token) if t: # mark fighshare submissions for this user as token obtained Submission().mark_all_token_obtained(user_id=request.user.id) # if all is well, the access token will be stored in FigshareSubmussionCollection except Exception as e: print(e) else: # retrieve token token = Figshare().get_token_for_user(user_id=ThreadLocal.get_current_user().id) # request.session['partial_submissions'] = doc else: request.session['partial_submissions'] = None
def submit(self, sub_id, dataFile_ids): profile_id = data_utils.get_current_request().session.get('profile_id') s = Submission().get_record(ObjectId(sub_id)) # get url for dataverse self.host = Submission().get_dataverse_details(sub_id) self.headers = {'X-Dataverse-key': self.host['apikey']} # if dataset id in submission meta, we are adding to existing dataset, otherwise # we are creating a new dataset new_or_existing = s['meta']['new_or_existing'] return self._add_to_dspace(s, new_or_existing)
def test_dataverse_submission(self): s = Submission().get_record(self.s_dv) request = self.client.post(path='/rest/submit_to_repo/', data={"sub_id": s["_id"]}) self.assertEqual(request.status_code, 200, "error submitting to dataverse") s = Submission().get_record(self.s_dv) self.assertTrue("accessions" in s, "accessions not in submission") self.assertTrue(s["accessions"]["dataset_doi"].startswith("doi"), "doi not present in submission") self.assertTrue(s["accessions"]["dataset_edit_uri"].startswith("http"), "edit uri not present in submission")
def dc_dict_to_dc(self, sub_id): # get file metadata, call converter to strip out dc fields s = Submission().get_record(ObjectId(sub_id)) f_id = s["bundle"][0] items = CgCoreSchemas().extract_repo_fields(str(f_id), "dataverse") temp_id = "copo:" + str(sub_id) # add the submission_id to the dataverse metadata to allow backwards treversal from dataverse items.append({ "dc": "dc.relation", "copo_id": "submission_id", "vals": temp_id }) Submission().update_meta(sub_id, json.dumps(items))
def resolve_submission_id(request, submission_id): sub = Submission().get_record(submission_id) # get all file metadata output = dict() files = list() for f in sub.get("bundle", list()): file = DataFile().get_record(f) files.append(file["description"]["attributes"]) output["files"] = files output["accessions"] = sub["accessions"] output["metadata"] = {} output["metadata"]["dc"] = sub["meta"]["fields"] return HttpResponse(j.dumps(output))
def setUpClass(cls): settings.UNIT_TESTING = True # create user cls.user = User.objects.create_user(username='******', first_name="jonny", last_name="appleseed", email='*****@*****.**', password='******') cls.user.save() # create profile p_dict = {"copo_id": "000000000", "description": "Test Description", "user_id": 1, "title": "Test Title"} cls.pid = Profile().save_record(dict(), **p_dict) # create datafile p = os.path.join(os.path.dirname(os.path.realpath(__file__)), "fixtures", "dummy_datafile_cgcore.json") with open(p) as f: p_dict = json.loads(f.read()) p_dict["file_location"] = os.path.join(os.path.dirname(os.path.realpath(__file__)), "fixtures", "fish.png") p_dict["name"] = "fish.png" profile = Profile().get_collection_handle().find_one({"copo_id": "000000000"}) p_dict["profile_id"] = str(cls.pid["_id"]) cls.d = DataFile().get_collection_handle().insert(p_dict) # create submission p = os.path.join(os.path.dirname(os.path.realpath(__file__)), "fixtures", "dummy_cgcore_dataverse_submission_existing.json") with open(p) as f: p_dict = json.loads(f.read()) p_dict["bundle_meta"][0]["file_path"] = os.path.join(os.path.dirname(os.path.realpath(__file__)), "fixtures", "fish.png") p_dict["bundle_meta"][0]["file_id"] = str(cls.d) p_dict["profile_id"] = str(cls.pid["_id"]) p_dict["bundle"].append(str(cls.d)) cls.s_dv = Submission().get_collection_handle().insert(p_dict)
def publish_article(self, article_id): endpoint = 'account/articles/{}/publish'.format(article_id) post = self.BASE_URL.format(endpoint=endpoint) resp = requests.post(post, headers=self.HEADERS) if resp.status_code == 200 or resp.status_code == 201: Submission().mark_figshare_article_published(article_id) return resp
def do_submission_xml(sub_id): sub = Submission().get_record(sub_id) dfs = list() for d in sub["bundle"]: dfs.append(DataFile().get_record(d)) df = dfs[0] submission = Element("SUBMISSION") # get names of files in bundle and append here # do alias alias = make_alias(sub) submission.set("alias", alias + "_sub") submission.set( "broker_name", df["description"]["attributes"]["study_type"]["study_broker"]) submission.set( "center_name", df["description"]["attributes"]["study_type"] ["study_analysis_center_name"]) submission_date = datetime.datetime.now().isoformat() submission.set("submission_date", submission_date) submission.set("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance") submission.set( "xsi:noNamespaceSchemaLocation", "ftp://ftp.sra.ebi.ac.uk/meta/xsd/sra_1_5/SRA.submission.xsd") contacts = Element("CONTACTS") copo_contact = Element("CONTACT") copo_contact.set("inform_on_error", "*****@*****.**") copo_contact.set("inform_on_status", "*****@*****.**") copo_contact.set("name", "COPO Support") contacts.append(copo_contact) people = Person(sub["profile_id"]).get_people_for_profile() for p in people: c = Element("CONTACT") c.set("name", p["firstName"] + " " + p["lastName"]) if [ x for x in p["roles"] if x["annotationValue"] == "SRA Inform On Status" ]: c.set("inform_on_status", p["email"]) if [ x for x in p["roles"] if x["annotationValue"] == "SRA Inform On Error" ]: c.set("inform_on_error", p["email"]) contacts.append(c) submission.append(contacts) actions = Element("ACTIONS") action = Element("ACTION") add = Element("ADD") add.set("schema", "analysis") add.set("source", "analysis.xml") action.append(add) actions.append(action) submission.append(actions) return prettify(submission)
def do_get_submission_accessions(self): target_id = self.param_dict.get("target_id", str()) submission_record = Submission().get_record(target_id) self.context[ "submission_accessions"] = htags.generate_submission_accessions_data( submission_record) return self.context
def __init__(self, sub_id=None): if sub_id: self.host = Submission().get_dataverse_details(sub_id) self.headers = {'X-CKAN-API-Key': self.host['apikey']} self.hostname = self.host["url"] if self.host["url"].endswith(".org"): self.host["url"] = self.host["url"] + "/api/3/action/"
def _add_to_dataverse(self): """ function adds datafiles to a dataset :return: """ sub = self.submission_record # check for dataverse alias alias = sub.get("meta", dict()).get( "dataverse_alias", str()) or sub.get("meta", dict()).get( "alias", str()) if not alias: return {"status": 404, "message": "\n Error getting dataverse"} # check for dataset doi doi = sub.get("meta", dict()).get("doi", str()) if not doi: return {"status": 404, "message": "\n Error getting dataset"} # add file to dataset result = self.send_files_curl(persistent_id=doi) if result is True: # store accessions and clear submission dv_response_data = self.get_dataverse_details(alias) ds_response_data = self.get_dataset_details(doi) dataset_title = [ x["value"] for x in ds_response_data.get( "latestVersion", dict()).get("metadataBlocks", dict()).get( "citation", dict()).get("fields", dict()) if x.get("typeName", str()) == "title" ] acc = dict() acc['dataset_id'] = ds_response_data.get("id", str()) acc['dataset_doi'] = doi acc['dataverse_alias'] = alias acc['dataverse_title'] = dv_response_data.get("name", "N/A") acc['dataset_title'] = "N/A" if dataset_title: if isinstance(dataset_title, list): acc['dataset_title'] = dataset_title[0] elif isinstance(dataset_title, str): acc['dataset_title'] = dataset_title sub['accessions'] = acc sub['target_id'] = sub.pop('_id', self.submission_id) Submission().save_record(dict(), **sub) self.clear_submission_metadata() return result
def test_dspace_existing_submission(self): request = self.client.post(path='/rest/submit_to_repo/', data={"sub_id": self.s_ds_existing}) self.assertEqual(request.status_code, 200) s = Submission().get_record(self.s_ds_existing) self.assertTrue( s["accessions"][0]["dspace_instance"].startswith("http")) self.assertTrue("uuid" in s["accessions"][0]) self.assertTrue( s["accessions"][0]["retrieveLink"].startswith("/rest/bitstreams/"))
def get_existing_metadata(request): # get base metadata for view showing new dspace item try: sub_id = request.GET["submission_id"] except KeyError: return HttpResponse(json.dumps({})) sub = Submission().get_record(ObjectId(sub_id)) out = sub["meta"] return HttpResponse(json.dumps(out))
def _do_file_transfer(self): submission_record = Submission().get_record(self.submission_id) # do we have files to be uploaded? bundle_df = pd.DataFrame(submission_record.get("bundle_meta", list())) if len(bundle_df ) == 0: # insufficient information to proceed - no bundle meta return pending_df = bundle_df[bundle_df['upload_status'] == False] if len(pending_df) > 0: path2library = os.path.join( BASE_DIR, REPOSITORIES['ASPERA']['resource_path']) user_name = REPOSITORIES['ASPERA']['user_token'] password = REPOSITORIES['ASPERA']['password'] # compose remote file directory remote_path = d_utils.get_ena_remote_path(self.submission_id) self._do_aspera_transfer(user_name=user_name, password=password, remote_path=remote_path, file_path=list(pending_df['file_path']), path2library=path2library) else: # no files to be uploaded transfer_fields = dict() transfer_fields["transfer_status"] = "completed" transfer_fields["pct_completed"] = '100' transfer_fields["current_time"] = datetime.now().strftime( "%d-%m-%Y %H:%M:%S") # save collected metadata to the transfer record RemoteDataFile().update_transfer(self.transfer_token, transfer_fields) self.context["ena_status"] = "files_transferred" return
def _update_dspace_submission(self, sub, dspace_url, data_id, item_id): data_url = dspace_url + "/rest/bitstreams/" + str(data_id) meta_url = dspace_url + "/rest/items/" + str(item_id) + "?expand=all" resp = requests.get(data_url) data = json.loads(resp.content.decode('utf-8')) if "uuid" not in data: data["uuid"] = data.pop("id") data['dspace_instance'] = dspace_url data["item_id"] = item_id data["meta_url"] = meta_url Submission().insert_dspace_accession(sub, data)
def get_dataverse_content(request): id = request.GET['id'] url = Submission().get_dataverse_details(request.GET['submission_id']) dv_url = url['url'] + '/api/v1/dataverses/' + id + '/contents' resp_dv = requests.get(dv_url).content.decode('utf-8') ids = json.loads(resp_dv) if not ids['data']: return HttpResponse( json.dumps({"no_datasets": "No datasets found in this dataverse."})) return HttpResponse(json.dumps(ids['data']))
def publish_figshare(request): sub_id = request.POST['submission_id'] s = Submission().get_record(sub_id) resp = FigshareSubmit(sub_id).publish_article(s['accession']) return HttpResponse( json.dumps({ 'status_code': resp.status_code, 'location': json.loads(resp.content.decode('utf8'))['location'] }))
def submit(self, sub_id, dataFile_ids): submission_record = Submission().get_record(sub_id) # bundle_meta, if present, should provide a better picture of what datafiles need to be uploaded if "bundle_meta" in submission_record: pending_files = [ x["file_id"] for x in submission_record['bundle_meta'] if not x["upload_status"] ] dataFile_ids = pending_files # physically transfer files path2library = os.path.join(BASE_DIR, REPOSITORIES['ASPERA']['resource_path']) # change these to be collected properly user_name = REPOSITORIES['ASPERA']['user_token'] password = REPOSITORIES['ASPERA']['password'] # create transfer record transfer_token = RemoteDataFile().create_transfer(sub_id)['_id'] self.submission = Submission().get_record(sub_id) self.profile = Profile().get_record(self.submission['profile_id']) remote_path = d_utils.get_ena_remote_path(sub_id) # get each file in the bundle file_path = [] for idx, f_id in enumerate(dataFile_ids): mongo_file = DataFile().get_record(ObjectId(f_id)) self.d_files.append(mongo_file) file_path.append(mongo_file.get("file_location", str())) case = self._do_aspera_transfer(transfer_token=transfer_token, user_name=user_name, password=password, remote_path=remote_path, file_path=file_path, path2library=path2library, sub_id=sub_id) return case
def search_dataverse(request): box = request.GET['box'] q = request.GET['q'] url = Submission().get_dataverse_details(request.GET['submission_id']) dv_url = url['url'] + '/api/v1/search' payload = {'q': q, 'per_page': 100, 'show_entity_ids': True, 'type': box} resp = requests.get(url=dv_url, params=payload) if not resp.status_code == 200: return HttpResponse(None) resp = resp.content.decode('utf-8') return HttpResponse(resp)
def do_study_xml(sub_id): # get submission object from mongo sub = Submission().get_record(sub_id) # get datafile objects dfs = list() for d in sub["bundle"]: dfs.append(DataFile().get_record(d)) df = dfs[0] # get profile object p = Profile().get_record(df["profile_id"]) # Do STUDY_SET study_set = Element("STUDY_SET") study_set.set("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance") study_set.set("xsi:noNamespaceSchemaLocation", "ftp://ftp.sra.ebi.ac.uk/meta/xsd/sra_1_5/SRA.study.xsd") # Do STUDY study = Element("STUDY") study.set("alias", str(sub["_id"])) study.set( "center_name", df["description"]["attributes"]["study_type"] ["study_analysis_center_name"]) study_set.append(study) # Do DESCRIPTOR descriptor = Element("DESCRIPTOR") # create element, append to parent and add text SubElement(descriptor, "STUDY_TITLE").text = p["title"] study_type = Element("STUDY_TYPE") es = get_study_type_enumeration( df["description"]["attributes"]["study_type"]["study_type"]) # es = df["description"]["attributes"]["study_type"]["study_type"] study_type.set("existing_study_type", es) descriptor.append(study_type) SubElement(descriptor, "STUDY_ABSTRACT").text = p["description"] study.append(descriptor) # Do STUDY_ATTRIBUTES study_attributes = Element("STUDY_ATTRIBUTES") # do attribute for date study_attribute = Element("STUDY_ATTRIBUTE") SubElement(study_attribute, "TAG").text = "Submission Date" SubElement(study_attribute, "VALUE").text = datetime.datetime.now().strftime('%Y-%m-%d') study_attributes.append(study_attribute) # here we can loop to add other STUDY_ATTRIBUTES study.append(study_attributes) return prettify(study_set)
def copo_get_submission_table_data(request): profile_id = request.POST.get('profile_id') submission = Submission(profile_id=profile_id).get_all_records(sort_by="date_created", sort_direction="-1") for s in submission: s['date_created'] = s['date_created'].strftime('%d %b %Y - %I:%M %p') s['date_modified'] = s['date_modified'].strftime('%d %b %Y - %I:%M %p') s['display_name'] = REPO_NAME_LOOKUP[s['repository']] if s['complete'] == 'false' or s['complete'] == False: s['status'] = 'Pending' else: s['status'] = 'Submitted' out = j.dumps(submission) return HttpResponse(out)
def get_existing_study_options(): from dal.copo_da import Submission subs = Submission().get_complete() out = list() out.append({"value": "required", "label": "-- select one --"}) out.append({"value": "none", "label": "Not in COPO"}) for s in subs: try: out.append({ "value": s['profile_id'], "label": s['accessions']['project']['accession'] }) except: pass return out
def _submit(self, sub_id, dataFile_ids): for f_id in dataFile_ids: mongo_file = DataFile().get_record(f_id) c = ChunkedUpload.objects.get(pk=int(mongo_file["file_id"])) file_path = os.path.join(self.MEDIA_ROOT, str(c.file)) orig_name = c.filename sub = mongo_file['description']['attributes'] data = dict() data['defined_type'] = sub.get('type_category', dict()).get('type') data['title'] = sub.get('title_author_description', dict()).get('title') authors = sub.get('title_author_description', dict()).get('author').split(',') lst = list() for x in authors: lst.append({'name': x}) data['authors'] = lst data['description'] = sub.get('title_author_description', dict()).get('description') cat = sub.get('type_category', dict()).get('categories') cat = cat.split(',') cat = list(map(int, cat)) data['categories'] = cat data['tags'] = sub.get('tags', dict()).get('keywords').split(',') data['references'] = sub.get('tags', dict()).get('references').split(',') for idx, x in enumerate(data['references']): if (not x.startswith('http')) or (not x.startswith('https')): if (not x.startswith('www')): data['references'][idx] = 'http://www.' + x else: data['references'][idx] = 'http://' + x data['funding'] = sub.get('tags', dict()).get('funding') data['licenses'] = sub.get('tags', dict()).get('licenses') data['publish'] = sub.get('figshare_publish', dict()).get('should_publish') # Create article #data = json.dumps({'title': orig_name, 'defined_type': 'figure'}) endpoint = 'account/articles' resp = requests.post(self.BASE_URL.format(endpoint=endpoint), headers=self.HEADERS, data=json.dumps(data)) article_id = json.loads(resp.content.decode('utf8'))['locat`ion'].rsplit('/', 1)[1] # Get file info #with open(file_path, 'rb') as fin: # fin.seek(0, 2) # Go to end of file # size = fin.tell() size = c.offset info = json.dumps({'name': orig_name, 'size': size }) # Initiate upload endpoint = 'account/articles/{}/files'.format(article_id) resp = requests.post(self.BASE_URL.format(endpoint=endpoint), headers=self.HEADERS, data=info) file_id = json.loads(resp.content.decode('utf-8'))['location'].rsplit('/', 1)[1] # Get upload/parts info endpoint = 'account/articles/{}/files/{}'.format(article_id, file_id) resp = requests.get(self.BASE_URL.format(endpoint=endpoint), headers=self.HEADERS) url = '{upload_url}'.format(**json.loads(resp.content.decode('utf-8'))) parts = json.loads(requests.get(url).content.decode('utf-8'))['parts'] # start upload timer t = datetime.datetime.now() # Upload parts with open(file_path, 'rb') as fin: for idx, part in enumerate(parts): percent_done = idx / len(parts) * 100 size = part['endOffset'] - part['startOffset'] + 1 address = '{}/{}'.format(url, part['partNo']) x = datetime.datetime.now() requests.put(address, data=fin.read(size)) delta = datetime.datetime.now() - x # calculate current upload rate in MB per second bw = (size / delta.total_seconds()) / 1000 / 1000 fields = {'transfer_rate': bw, 'pct_completed': percent_done} RemoteDataFile().update_transfer(self.transfer_token, fields) # Mark file upload as completed upload_time = datetime.datetime.now() - t requests.post(self.BASE_URL.format(endpoint=endpoint), headers=self.HEADERS) fields = {'pct_completed': 100, 'transfer_status': 'success', 'completed_on':str(datetime.datetime.now()), 'article_id': article_id} RemoteDataFile().update_transfer(self.transfer_token, fields) if data['publish'] == 'True': # publish api endpoint = 'account/articles/{}/publish'.format(article_id) resp = requests.post(self.BASE_URL.format(endpoint=endpoint), headers=self.HEADERS) location = json.loads(resp.content.decode('utf8'))['location'] # get accession data endpoint = 'articles/{}'.format(article_id) resp = requests.get(self.BASE_URL.format(endpoint=endpoint), headers=self.HEADERS) # save accessions to mongo profile record s = Submission().get_record(sub_id) s['accession'] = json.loads(resp.content.decode('utf8'))['figshare_url'] s['complete'] = True s['status'] = 'published' s['target_id'] = str(s.pop('_id')) Submission().save_record(dict(), **s) else: # save accessions to mongo profile record s = Submission().get_record(sub_id) s['accession'] = article_id s['complete'] = True s['status'] = 'not published' s['target_id'] = str(s.pop('_id')) Submission().save_record(dict(), **s) # mark submission as complete Submission().mark_submission_complete(sub_id)
def _do_aspera_transfer(self, transfer_token=None, user_name=None, password=None, remote_path=None, file_path=None, path2library=None, sub_id=None): # check submission status submission_status = Submission().isComplete(sub_id) if not submission_status or submission_status == 'false': lg.log('Starting aspera transfer', level=Loglvl.INFO, type=Logtype.FILE) kwargs = dict(target_id=sub_id, commenced_on=str(datetime.now())) Submission().save_record(dict(), **kwargs) # k is a loop counter which keeps track of the number of files transfered k = -1 f_str = str() for f in file_path: f_str = f_str + ' ' + f cmd = "./ascp -d -QT -l300M -L- {f_str!s} {user_name!s}:{remote_path!s}".format(**locals()) lg.log(cmd, level=Loglvl.INFO, type=Logtype.FILE) os.chdir(path2library) try: thread = pexpect.spawn(cmd, timeout=None) thread.expect(["assword:", pexpect.EOF]) thread.sendline(password) cpl = thread.compile_pattern_list([pexpect.EOF, '(.+)']) while True: i = thread.expect_list(cpl, timeout=None) if i == 0: # EOF! Possible error point if encountered before transfer completion print("Process termination - check exit status!") break elif i == 1: pexp_match = thread.match.group(1) prev_file = '' tokens_to_match = ["Mb/s"] units_to_match = ["KB", "MB"] time_units = ['d', 'h', 'm', 's'] end_of_transfer = False if all(tm in pexp_match.decode("utf-8") for tm in tokens_to_match): fields = { "transfer_status": "transferring", "current_time": datetime.now().strftime("%d-%m-%Y %H:%M:%S") } tokens = pexp_match.decode("utf-8").split(" ") for token in tokens: if not token == '': if "file" in token: fields['file_path'] = token.split('=')[-1] if prev_file != fields['file_path']: k = k + 1 prev_file == fields['file_path'] elif '%' in token: pct = float((token.rstrip("%"))) # pct = (1/len(file_path) * pct) + (k * 1/len(file_path) * 100) fields['pct_completed'] = pct # flag end of transfer print(str(transfer_token) + ": " + str(pct) + '% transfered') if token.rstrip("%") == 100: end_of_transfer = True elif any(um in token for um in units_to_match): fields['amt_transferred'] = token elif "Mb/s" in token or "Mbps" in token: t = token[:-4] if '=' in t: fields['transfer_rate'] = t[t.find('=') + 1:] else: fields['transfer_rate'] = t elif "status" in token: fields['transfer_status'] = token.split('=')[-1] elif "rate" in token: fields['transfer_rate'] = token.split('=')[-1] elif "elapsed" in token: fields['elapsed_time'] = token.split('=')[-1] elif "loss" in token: fields['bytes_lost'] = token.split('=')[-1] elif "size" in token: fields['file_size_bytes'] = token.split('=')[-1] elif "ETA" in token: eta = tokens[-2] estimated_completion = "" eta_split = eta.split(":") t_u = time_units[-len(eta_split):] for indx, eta_token in enumerate(eta.split(":")): if eta_token == "00": continue estimated_completion += eta_token + t_u[indx] + " " fields['estimated_completion'] = estimated_completion RemoteDataFile().update_transfer(transfer_token, fields) kwargs = dict(target_id=sub_id, completed_on=datetime.now()) Submission().save_record(dict(), **kwargs) # close thread thread.close() lg.log('Aspera Transfer completed', level=Loglvl.INFO, type=Logtype.FILE) except OSError: return redirect('web.apps.web_copo.views.goto_error', request=HttpRequest(), message='There appears to be an issue with EBI.') # setup paths for conversion directories conv_dir = os.path.join(self._dir, sub_id) if not os.path.exists(os.path.join(conv_dir, 'json')): os.makedirs(os.path.join(conv_dir, 'json')) json_file_path = os.path.join(conv_dir, 'json', 'isa_json.json') xml_dir = conv_dir xml_path = os.path.join(xml_dir, 'run_set.xml') # Convert COPO JSON to ISA JSON lg.log('Obtaining ISA-JSON', level=Loglvl.INFO, type=Logtype.FILE) conv = cnv.Investigation(submission_token=sub_id) meta = conv.get_schema() json_file = open(json_file_path, '+w') # dump metadata to output file json_file.write(dumps(meta)) json_file.close() # Validate ISA_JSON lg.log('Validating ISA-JSON', level=Loglvl.INFO, type=Logtype.FILE) with open(json_file_path) as json_file: v = isajson.validate(json_file) lg.log(v, level=Loglvl.INFO, type=Logtype.FILE) # convert to SRA with isatools converter lg.log('Converting to SRA', level=Loglvl.INFO, type=Logtype.FILE) sra_settings = d_utils.json_to_pytype(SRA_SETTINGS).get("properties", dict()) datafilehashes = conv.get_datafilehashes() json2sra.convert2(json_fp=open(json_file_path), path=conv_dir, sra_settings=sra_settings, datafilehashes=datafilehashes, validate_first=False) # finally submit to SRA lg.log('Submitting XMLS to ENA via CURL', level=Loglvl.INFO, type=Logtype.FILE) submission_file = os.path.join(xml_dir, 'submission.xml') project_file = os.path.join(xml_dir, 'project_set.xml') sample_file = os.path.join(xml_dir, 'sample_set.xml') experiment_file = os.path.join(xml_dir, 'experiment_set.xml') run_file = os.path.join(xml_dir, 'run_set.xml') curl_cmd = 'curl -k -F "SUBMISSION=@' + submission_file + '" \ -F "PROJECT=@' + os.path.join(remote_path, project_file) + '" \ -F "SAMPLE=@' + os.path.join(remote_path, sample_file) + '" \ -F "EXPERIMENT=@' + os.path.join(remote_path, experiment_file) + '" \ -F "RUN=@' + os.path.join(remote_path, run_file) + '" \ "https://www-test.ebi.ac.uk/ena/submit/drop-box/submit/?auth=ENA%20Webin-39233%20Apple123"' output = subprocess.check_output(curl_cmd, shell=True) lg.log(output, level=Loglvl.INFO, type=Logtype.FILE) lg.log("Extracting fields from receipt", level=Loglvl.INFO, type=Logtype.FILE) xml = ET.fromstring(output) accessions = dict() # get project accessions project = xml.find('./PROJECT') project_accession = project.get('accession', default='undefined') project_alias = project.get('alias', default='undefined') accessions['project'] = {'accession': project_accession, 'alias': project_alias} # get experiment accessions experiment = xml.find('./EXPERIMENT') experiment_accession = experiment.get('accession', default='undefined') experiment_alias = experiment.get('alias', default='undefined') accessions['experiment'] = {'accession': experiment_accession, 'alias': experiment_alias} # get submission accessions submission = xml.find('./SUBMISSION') submission_accession = submission.get('accession', default='undefined') submission_alias = submission.get('alias', default='undefined') accessions['submission'] = {'accession': submission_accession, 'alias': submission_alias} # get run accessions run = xml.find('./RUN') run_accession = run.get('accession', default='undefined') run_alias = run.get('alias', default='undefined') accessions['run'] = {'accession': run_accession, 'alias': run_alias} # get sample accessions samples = xml.findall('./SAMPLE') sample_accessions = list() for sample in samples: sample_accession = sample.get('accession', default='undefined') sample_alias = sample.get('alias', default='undefined') s = {'sample_accession': sample_accession, 'sample_alias': sample_alias} for bio_s in sample: s['biosample_accession'] = bio_s.get('accession', default='undefined') sample_accessions.append(s) accessions['sample'] = sample_accessions # save accessions to mongo profile record s = Submission().get_record(sub_id) s['accessions'] = accessions s['complete'] = True s['target_id'] = str(s.pop('_id')) Submission().save_record(dict(), **s)