def add_new_rows_to_table(df, replace_table=False, dry_run=False): """Add rows for synapse IDs not already represented in the table or replace the whole table""" schema = syn.get(TABLE_SYNAPSE_ID) if replace_table: ## delete previous entries in pilot-63-progress table results = syn.tableQuery('select * from %s' % utils.id_of(schema), resultsAs='rowset') if not dry_run: syn.delete(results) else: results = syn.tableQuery('select synapse_id from %s' % utils.id_of(schema), includeRowIdAndRowVersion=False) synapse_ids = [row[0] for row in results] df = df[[ synapse_id not in synapse_ids for synapse_id in df['synapse_id'] ]] if df.shape[0] > 0: if dry_run: print "Dry run: would have added %d rows to pilot-63-progress table" % df.shape[ 0] else: print "Adding %d rows to pilot-63-progress table" % df.shape[0] syn.store(Table(schema, df)) return df.shape[0] else: print "No new rows for pilot-63-progress table" return None
def create_challenge_object(project, participants_team): challenge_json = { 'participantTeamId': utils.id_of(participants_team), 'projectId': utils.id_of(project) } return DictObject( **syn.restPOST("/challenge", body=json.dumps(challenge_json)))
def archive(evaluation, destination=None, name=None, query=None): """ Archive the submissions for the given evaluation queue and store them in the destination synapse folder. :param evaluation: a synapse evaluation queue or its ID :param destination: a synapse folder or its ID :param query: a query that will return the desired submissions. At least the ID must be returned. defaults to _select * from evaluation_[EVAL_ID] where status=="SCORED"_. """ challenge = {'5877348':'FusionDetection','5952651':'IsoformQuantification'} if not query: query = 'select * from evaluation_%s where status=="VALIDATED"' % utils.id_of(evaluation) path = challenge[utils.id_of(evaluation)] ## for each submission, download it's associated file and write a line of metadata results = Query(query=query) if 'objectId' not in results.headers: raise ValueError("Can't find the required field \"objectId\" in the results of the query: \"{0}\"".format(query)) for result in results: #Check if the folder has already been created in synapse #(This is used as a tool to check submissions that have already been cached) submissionId = result[results.headers.index('objectId')] check = syn.query('select id,name from folder where parentId == "%s" and name == "%s"' % (destination,submissionId)) if check['totalNumberOfResults']==0: os.mkdir(submissionId) submission_parent = syn.store(Folder(submissionId,parent=destination)) submission = syn.getSubmission(submissionId, downloadLocation=submissionId) newFilePath = submission.filePath.replace(' ', '_') shutil.move(submission.filePath,newFilePath) #Store CWL file in bucket os.system('gsutil cp -R %s gs://smc-rna-cache/%s' % (submissionId,path)) with open(newFilePath,"r") as cwlfile: docs = yaml.load(cwlfile) merged = docs['$graph'] docker = [] for tools in merged: if tools['class'] == 'CommandLineTool': if tools.get('requirements',None) is not None: for i in tools['requirements']: if i.get('dockerPull',None) is not None: docker.append(i['dockerPull']) if tools['class'] == 'Workflow': hints = tools.get("hints",None) if hints is not None: for i in tools['hints']: if os.path.basename(i['class']) == "synData": temp = syn.get(i['entity']) #Store index files os.system('gsutil cp %s gs://smc-rna-cache/%s/%s' % (temp.path,path,submissionId)) os.system('rm -rf ~/.synapseCache/*') #Pull, save, and store docker containers docker = set(docker) for i in docker: os.system('sudo docker pull %s' % i) os.system('sudo docker save %s' % i) os.system('sudo docker save -o %s.tar %s' %(os.path.basename(i),i)) os.system('sudo chmod a+r %s.tar' % os.path.basename(i)) os.system('gsutil cp %s.tar gs://smc-rna-cache/%s/%s' % (os.path.basename(i),path,submissionId)) os.remove("%s.tar" % os.path.basename(i)) os.system('rm -rf %s' % submissionId)
def archive(evaluation, destination=None, name=None, query=None): """ Archive the submissions for the given evaluation queue and store them in the destination synapse folder. :param evaluation: a synapse evaluation queue or its ID :param destination: a synapse folder or its ID :param query: a query that will return the desired submissions. At least the ID must be returned. defaults to _select * from evaluation_[EVAL_ID] where status=="SCORED"_. """ tempdir = tempfile.mkdtemp() archive_dirname = 'submissions_%s' % utils.id_of(evaluation) if not query: query = 'select * from evaluation_%s where status=="SCORED"' % utils.id_of( evaluation) ## for each submission, download it's associated file and write a line of metadata results = Query(query=query) if not name: name = 'submissions_%s.tgz' % utils.id_of(evaluation) tar_path = os.path.join(tempdir, name) metadata_file_path = os.path.join(tempdir, 'submission_metadata.csv') print "creating tar at:", tar_path ## for each submission, we add a file to the tar and a row ## to the metadata .csv file with tarfile.open(tar_path, mode='w:gz') as archive: with open(metadata_file_path, 'w') as f: ## write header row to .csv file header = ','.join(results.headers) print header f.write(header + '\n') ## add submissions to archive and write rows to .csv file for result in results: ## retrieve file into cache and copy it to destination submission = syn.getSubmission( result[results.headers.index('objectId')]) archive.add(submission.filePath, arcname=os.path.join( archive_dirname, submission.id + "_" + os.path.basename(submission.filePath))) line = (','.join(unicode(item) for item in result)).encode('utf-8') print line f.write(line + '\n') ## add metadata .csv file to the tar archive.add(name=metadata_file_path, arcname=os.path.join(archive_dirname, 'submission_metadata.csv')) entity = syn.store(File(tar_path, parent=destination), evaluation_id=utils.id_of(evaluation)) print "created:", entity.id, entity.name return entity.id
def archive(evaluation, destination=None, name=None, query=None): """ Archive the submissions for the given evaluation queue and store them in the destination synapse folder. :param evaluation: a synapse evaluation queue or its ID :param destination: a synapse folder or its ID :param query: a query that will return the desired submissions. At least the ID must be returned. defaults to _select * from evaluation_[EVAL_ID] where status=="SCORED"_. """ tempdir = tempfile.mkdtemp() archive_dirname = 'submissions_%s' % utils.id_of(evaluation) if not query: query = 'select * from evaluation_%s where status=="SCORED"' % utils.id_of( evaluation) ## for each submission, download it's associated file and write a line of metadata results = Query(query=query) if 'objectId' not in results.headers: raise ValueError( "Can't find the required field \"objectId\" in the results of the query: \"{0}\"" .format(query)) if not name: name = 'submissions_%s.tgz' % utils.id_of(evaluation) tar_path = os.path.join(tempdir, name) print "creating tar at:", tar_path print results.headers with tarfile.open(tar_path, mode='w:gz') as archive: with open(os.path.join(tempdir, 'submission_metadata.csv'), 'w') as f: f.write( (','.join(hdr for hdr in (results.headers + ['filename'])) + '\n').encode('utf-8')) for result in results: ## retrieve file into cache and copy it to destination submission = syn.getSubmission( result[results.headers.index('objectId')]) prefixed_filename = submission.id + "_" + os.path.basename( submission.filePath) archive.add(submission.filePath, arcname=os.path.join(archive_dirname, prefixed_filename)) line = (','.join( unicode(item) for item in (result + [prefixed_filename]))).encode('utf-8') print line f.write(line + '\n') archive.add(name=os.path.join(tempdir, 'submission_metadata.csv'), arcname=os.path.join(archive_dirname, 'submission_metadata.csv')) entity = syn.store(File(tar_path, parent=destination), evaluation_id=utils.id_of(evaluation)) print "created:", entity.id, entity.name return entity.id
def tableUpdateWhere(tableSchema, whereClause, setDict): """ The UPDATE statement is used to update existing rows in a table. """ from synapseclient.table import Table from synapseclient.utils import id_of import tempfile id = id_of(tableSchema) query = 'select %s from %s where %s' % (','.join(setDict.keys()), id, whereClause) df = syn.tableQuery(query).asDataFrame() for key, value in setDict.items(): df[key] = value print(df) # df.to_csv('skit.csv') return syn.store(Table(id_of(tableSchema), 'skit.csv'))
def archive(evaluation, destination=None, name=None, query=None): """ Archive the submissions for the given evaluation queue and store them in the destination synapse folder. :param evaluation: a synapse evaluation queue or its ID :param destination: a synapse folder or its ID :param query: a query that will return the desired submissions. At least the ID must be returned. defaults to _select * from evaluation_[EVAL_ID] where status=="SCORED"_. """ tempdir = tempfile.mkdtemp() archive_dirname = 'submissions_%s' % utils.id_of(evaluation) if not query: query = 'select * from evaluation_%s where status=="SCORED"' % utils.id_of(evaluation) ## for each submission, download it's associated file and write a line of metadata results = Query(query=query) if not name: name = 'submissions_%s.tgz' % utils.id_of(evaluation) tar_path = os.path.join(tempdir, name) metadata_file_path = os.path.join(tempdir, 'submission_metadata.csv') print "creating tar at:", tar_path ## for each submission, we add a file to the tar and a row ## to the metadata .csv file with tarfile.open(tar_path, mode='w:gz') as archive: with open(metadata_file_path, 'w') as f: ## write header row to .csv file header = ','.join(results.headers) print header f.write(header + '\n') ## add submissions to archive and write rows to .csv file for result in results: ## retrieve file into cache and copy it to destination submission = syn.getSubmission(result[results.headers.index('objectId')]) archive.add(submission.filePath, arcname=os.path.join(archive_dirname, submission.id + "_" + os.path.basename(submission.filePath))) line = (','.join(unicode(item) for item in result)).encode('utf-8') print line f.write(line + '\n') ## add metadata .csv file to the tar archive.add( name=metadata_file_path, arcname=os.path.join(archive_dirname, 'submission_metadata.csv')) entity = syn.store(File(tar_path, parent=destination), evaluation_id=utils.id_of(evaluation)) print "created:", entity.id, entity.name return entity.id
def test_id_of(): assert utils.id_of(1) == "1" assert utils.id_of("syn12345") == "syn12345" assert utils.id_of({"foo": 1, "id": 123}) == 123 assert_raises(ValueError, utils.id_of, {"foo": 1, "idzz": 123}) assert utils.id_of({"properties": {"id": 123}}) == 123 assert_raises(ValueError, utils.id_of, {"properties": {"qq": 123}}) assert_raises(ValueError, utils.id_of, object()) class Foo: def __init__(self, id): self.properties = {"id": id} foo = Foo(123) assert utils.id_of(foo) == 123
def test_id_of(): assert utils.id_of(1) == '1' assert utils.id_of('syn12345') == 'syn12345' assert utils.id_of({'foo':1, 'id':123}) == 123 assert_raises(ValueError, utils.id_of, {'foo':1, 'idzz':123}) assert utils.id_of({'properties':{'id':123}}) == 123 assert_raises(ValueError, utils.id_of, {'properties':{'qq':123}}) assert_raises(ValueError, utils.id_of, object()) class Foo: def __init__(self, id): self.properties = {'id':id} foo = Foo(123) assert utils.id_of(foo) == 123
def __init__(self, name=None, columns=None, parent=None, properties=None, annotations=None, local_state=None, **kwargs): self.properties.setdefault('columnIds', []) if name: kwargs['name'] = name if columns: for column in columns: if isinstance(column, basestring) or isinstance( column, int) or hasattr(column, 'id'): kwargs.setdefault('columnIds', []).append(utils.id_of(column)) elif isinstance(column, Column): kwargs.setdefault('columns_to_store', []).append(column) else: raise ValueError("Not a column? %s" % unicode(column)) super(Schema, self).__init__(concreteType=Schema._synapse_entity_type, properties=properties, annotations=annotations, local_state=local_state, parent=parent, **kwargs)
def usedEntity(self, target, targetVersion=None, wasExecuted=False): """ TODO_Sphinx :param target: either a synapse entity or entity id (as a string) :param targetVersion: optionally specify the version of the entity :param wasExecuted: boolean indicating whether the entity represents code that was executed to produce the result """ reference = {'targetId': id_of(target)} if targetVersion: reference['targetVersionNumber'] = int(targetVersion) else: try: # If we have an Entity, get it's version number reference['targetVersionNumber'] = target['versionNumber'] except (KeyError, TypeError): # Count on platform to get the current version of the entity from Synapse pass self['used'].append({ 'reference': reference, 'wasExecuted': wasExecuted, 'concreteType': 'org.sagebionetworks.repo.model.provenance.UsedEntity' })
def create_supertable_leaderboard(evaluation, leaderboard_columns): """ Create the leaderboard using a supertable, a markdown extension that dynamically builds a table by querying submissions. Because the supertable re-queries whenever the page is rendered, this step only has to be done once. """ uri_base = urllib.quote_plus("/evaluation/submission/query") # it's incredibly picky that the equals sign here has to be urlencoded, but # the later equals signs CAN'T be urlencoded. query = urllib.quote_plus( 'query=select * from evaluation_%s where status=="SCORED"' % utils.id_of(evaluation)) params = [('paging', 'true'), ('queryTableResults', 'true'), ('showIfLoggedInOnly', 'false'), ('pageSize', '25'), ('showRowNumber', 'false'), ('jsonResultsKeyName', 'rows')] # Columns specifications have 4 fields: renderer, display name, column name, sort # Renderer and sort are usually 'none' and 'NONE'. for i, column in enumerate(leaderboard_columns): fields = {'renderer': 'none', 'sort': 'NONE'} fields.update(column) if 'display_name' not in fields: fields['display_name'] = fields['name'] params.append( ('columnConfig%s' % i, "{renderer},{display_name},{name};,{sort}".format(**fields))) return "${supertable?path=" + uri_base + "%3F" + query + "&" + "&".join( [key + "=" + urllib.quote_plus(value) for key, value in params]) + "}"
def list_evaluations(project): print '\n\nEvaluations for project: ', utils.id_of(project) print '-' * 60 evaluations = syn.getEvaluationByContentSource(project) for evaluation in evaluations: print "Evaluation: %s" % evaluation.id, evaluation.name.encode('utf-8')
def __init__(self, properties=None, annotations=None, local_state=None, parent=None, **kwargs): if properties: if isinstance(properties, collections.Mapping): if 'annotations' in properties and isinstance( properties['annotations'], collections.Mapping): annotations.update(properties['annotations']) del properties['annotations'] self.__dict__['properties'].update(properties) else: raise Exception('Unknown argument type: properties is a %s' % str(type(properties))) if annotations: if isinstance(annotations, collections.Mapping): self.__dict__['annotations'].update(annotations) elif isinstance(annotations, basestring): self.properties['annotations'] = annotations else: raise Exception('Unknown argument type: annotations is a %s' % str(type(annotations))) if local_state: if isinstance(local_state, collections.Mapping): self.local_state(local_state) else: raise Exception('Unknown argument type: local_state is a %s' % str(type(local_state))) for key in self.__class__._local_keys: if key not in self.__dict__: self.__dict__[key] = None # Extract parentId from parent if 'parentId' not in kwargs: try: if parent: kwargs['parentId'] = id_of(parent) except Exception: if parent and isinstance(parent, Entity) and 'id' not in parent: raise Exception( 'Couldn\'t find \'id\' of parent. Has it been stored in Synapse?' ) else: raise Exception('Couldn\'t find \'id\' of parent.') # Note: that this will work properly if derived classes declare their # internal state variable *before* invoking super(...).__init__(...) for key, value in kwargs.items(): self.__setitem__(key, value) if 'entityType' not in self: self['entityType'] = self.__class__._synapse_entity_type
def test_id_of(): assert_equals(utils.id_of(1), '1') assert_equals(utils.id_of('syn12345'), 'syn12345') assert_equals(utils.id_of({'foo': 1, 'id': 123}), '123') assert_raises(ValueError, utils.id_of, {'foo': 1, 'idzz': 123}) assert_equals(utils.id_of({'properties': {'id': 123}}), '123') assert_raises(ValueError, utils.id_of, {'properties': {'qq': 123}}) assert_raises(ValueError, utils.id_of, object()) class Foo: def __init__(self, id_attr_name, id): self.properties = {id_attr_name: id} id_attr_names = ['id', 'ownerId', 'tableId'] for attr_name in id_attr_names: foo = Foo(attr_name, 123) assert_equals(utils.id_of(foo), '123')
def removeColumn(self, column): """ :param column: a column object or its ID """ if isinstance(column, basestring) or isinstance(column, int) or hasattr(column, 'id'): self.properties.columnIds.remove(utils.id_of(column)) elif isinstance(column, Column) and self.columns_to_store: self.columns_to_store.remove(column) else: ValueError("Can't remove column %s" + unicode(column))
def archive(evaluation, destination=None, name=None, query=None): """ Archive the submissions for the given evaluation queue and store them in the destination synapse folder. :param evaluation: a synapse evaluation queue or its ID :param destination: a synapse folder or its ID :param query: a query that will return the desired submissions. At least the ID must be returned. defaults to _select * from evaluation_[EVAL_ID] where status=="SCORED"_. """ tempdir = tempfile.mkdtemp() archive_dirname = "submissions_%s" % utils.id_of(evaluation) if not query: query = 'select * from evaluation_%s where status=="SCORED"' % utils.id_of(evaluation) ## for each submission, download it's associated file and write a line of metadata results = Query(query=query) if "objectId" not in results.headers: raise ValueError('Can\'t find the required field "objectId" in the results of the query: "{0}"'.format(query)) if not name: name = "submissions_%s.tgz" % utils.id_of(evaluation) tar_path = os.path.join(tempdir, name) print "creating tar at:", tar_path print results.headers with tarfile.open(tar_path, mode="w:gz") as archive: with open(os.path.join(tempdir, "submission_metadata.csv"), "w") as f: f.write((",".join(hdr for hdr in (results.headers + ["filename"])) + "\n").encode("utf-8")) for result in results: ## retrieve file into cache and copy it to destination submission = syn.getSubmission(result[results.headers.index("objectId")]) prefixed_filename = submission.id + "_" + os.path.basename(submission.filePath) archive.add(submission.filePath, arcname=os.path.join(archive_dirname, prefixed_filename)) line = (",".join(unicode(item) for item in (result + [prefixed_filename]))).encode("utf-8") print line f.write(line + "\n") archive.add( name=os.path.join(tempdir, "submission_metadata.csv"), arcname=os.path.join(archive_dirname, "submission_metadata.csv"), ) entity = syn.store(File(tar_path, parent=destination), evaluation_id=utils.id_of(evaluation)) print "created:", entity.id, entity.name return entity.id
def __init__(self, **kwargs): #Verify that the parameters are correct if not 'owner' in kwargs: sys.stderr.write('Wiki constructor must have an owner specified') raise ValueError super(Wiki, self).__init__(kwargs) self.ownerType=guess_object_type(self.owner) self.ownerId=id_of(self.owner) del self['owner']
def __init__(self, properties=None, annotations=None, local_state=None, parent=None, **kwargs): if properties: if isinstance(properties, collections.Mapping): if "annotations" in properties and isinstance(properties["annotations"], collections.Mapping): annotations.update(properties["annotations"]) del properties["annotations"] self.__dict__["properties"].update(properties) else: raise SynapseMalformedEntityError("Unknown argument type: properties is a %s" % str(type(properties))) if annotations: if isinstance(annotations, collections.Mapping): self.__dict__["annotations"].update(annotations) elif isinstance(annotations, str): self.properties["annotations"] = annotations else: raise SynapseMalformedEntityError("Unknown argument type: annotations is a %s" % str(type(annotations))) if local_state: if isinstance(local_state, collections.Mapping): self.local_state(local_state) else: raise SynapseMalformedEntityError("Unknown argument type: local_state is a %s" % str(type(local_state))) for key in self.__class__._local_keys: if key not in self.__dict__: self.__dict__[key] = None # Extract parentId from parent if "parentId" not in kwargs: if parent: try: kwargs["parentId"] = id_of(parent) except Exception: if isinstance(parent, Entity) and "id" not in parent: raise SynapseMalformedEntityError( "Couldn't find 'id' of parent. Has it been stored in Synapse?" ) else: raise SynapseMalformedEntityError("Couldn't find 'id' of parent.") # Note: that this will work properly if derived classes declare their # internal state variable *before* invoking super(...).__init__(...) for key, value in six.iteritems(kwargs): self.__setitem__(key, value) if "concreteType" not in self: self["concreteType"] = self.__class__._synapse_entity_type ## Only project can be top-level. All other entity types require parentId ## don't enforce this for generic Entity if "parentId" not in self and not isinstance(self, Project) and not type(self) == Entity: raise SynapseMalformedEntityError("Entities of type %s must have a parentId." % type(self))
def removeColumn(self, column): """ :param column: a column object or its ID """ if isinstance(column, basestring) or isinstance( column, int) or hasattr(column, 'id'): self.properties.columnIds.remove(utils.id_of(column)) elif isinstance(column, Column) and self.columns_to_store: self.columns_to_store.remove(column) else: ValueError("Can't remove column %s" + unicode(column))
def addColumn(self, column): """ :param column: a column object or its ID """ if isinstance(column, basestring) or isinstance(column, int) or hasattr(column, 'id'): self.properties.columnIds.append(utils.id_of(column)) elif isinstance(column, Column): if not self.__dict__.get('columns_to_store', None): self.__dict__['columns_to_store'] = [] self.__dict__['columns_to_store'].append(column) else: raise ValueError("Not a column? %s" % unicode(column))
def test_store_table_datetime(): current_datetime = datetime.fromtimestamp(round(time.time(), 3)) schema = syn.store( Schema("testTable", [Column(name="testerino", columnType='DATE')], project)) rowset = RowSet(rows=[Row([current_datetime])], schema=schema) rowset_table = syn.store(Table(schema, rowset)) query_result = syn.tableQuery("select * from %s" % id_of(schema), resultsAs="rowset") assert_equals(current_datetime, query_result.rowset['rows'][0]['values'][0])
def syncFromSynapse(syn, entity, path=None, ifcollision='overwrite.local', allFiles = None): """Synchronizes all the files in a folder (including subfolders) from Synapse. :param syn: A synapse object as obtained with syn = synapseclient.login() :param entity: A Synapse ID, a Synapse Entity object of type folder or project. :param path: An optional path where the file hierarchy will be reproduced. If not specified the files will by default be placed in the synapseCache. :param ifcollision: Determines how to handle file collisions. May be "overwrite.local", "keep.local", or "keep.both". Defaults to "overwrite.local". :returns: list of entities (files, tables, links) This function will crawl all subfolders of the project/folder specified by `id` and download all files that have not already been downloaded. If there are newer files in Synapse (or a local file has been edited outside of the cache) since the last download then local the file will be replaced by the new file unless ifcollision is changed. Example:: Download and print the paths of all downloaded files:: entities = syncFromSynapse(syn, "syn1234") for f in entities: print(f.path) """ if allFiles is None: allFiles = list() id = id_of(entity) results = syn.chunkedQuery("select id, name, nodeType from entity where entity.parentId=='%s'" %id) for result in results: if is_container(result): if path is not None: #If we are downloading outside cache create directory. new_path = os.path.join(path, result['entity.name']) try: os.mkdir(new_path) except OSError as err: if err.errno!=errno.EEXIST: raise print('making dir', new_path) else: new_path = None syncFromSynapse(syn, result['entity.id'], new_path, ifcollision, allFiles) else: ent = syn.get(result['entity.id'], downloadLocation = path, ifcollision = ifcollision) allFiles.append(ent) return allFiles
def __init__(self, properties=None, annotations=None, local_state=None, parent=None, **kwargs): if properties: if isinstance(properties, collections.Mapping): if 'annotations' in properties and isinstance(properties['annotations'], collections.Mapping): annotations.update(properties['annotations']) del properties['annotations'] self.__dict__['properties'].update(properties) else: raise SynapseMalformedEntityError('Unknown argument type: properties is a %s' % str(type(properties))) if annotations: if isinstance(annotations, collections.Mapping): self.__dict__['annotations'].update(annotations) elif isinstance(annotations, basestring): self.properties['annotations'] = annotations else: raise SynapseMalformedEntityError('Unknown argument type: annotations is a %s' % str(type(annotations))) if local_state: if isinstance(local_state, collections.Mapping): self.local_state(local_state) else: raise SynapseMalformedEntityError('Unknown argument type: local_state is a %s' % str(type(local_state))) for key in self.__class__._local_keys: if key not in self.__dict__: self.__dict__[key] = None # Extract parentId from parent if 'parentId' not in kwargs: if parent: try: kwargs['parentId'] = id_of(parent) except Exception: if isinstance(parent, Entity) and 'id' not in parent: raise SynapseMalformedEntityError("Couldn't find 'id' of parent. Has it been stored in Synapse?") else: raise SynapseMalformedEntityError("Couldn't find 'id' of parent.") # Note: that this will work properly if derived classes declare their # internal state variable *before* invoking super(...).__init__(...) for key, value in kwargs.items(): self.__setitem__(key, value) if 'concreteType' not in self: self['concreteType'] = self.__class__._synapse_entity_type ## Only project can be top-level. All other entity types require parentId ## don't enforce this for generic Entity if 'parentId' not in self and not isinstance(self, Project) and not type(self)==Entity: raise SynapseMalformedEntityError("Entities of type %s must have a parentId." % type(self))
def setup(self): self.eval_id = '9090' self.contributors = None self.entity = { 'versionNumber': 7, 'id': 'syn1009', 'etag': 'etag', 'name': 'entity name' } self.eval = { 'contentSource': self.entity['id'], 'createdOn': '2013-11-06T06:04:26.789Z', 'etag': '86485ea1-8c89-4f24-a0a4-2f63bc011091', 'id': self.eval_id, 'name': 'test evaluation', 'ownerId': '1560252', 'status': 'OPEN', 'submissionReceiptMessage': 'Your submission has been received.!' } self.team = {'id': 5, 'name': 'Team Blue'} self.submission = { 'id': 123, 'evaluationId': self.eval_id, 'name': self.entity['name'], 'entityId': self.entity['id'], 'versionNumber': self.entity['versionNumber'], 'teamId': id_of(self.team['id']), 'contributors': self.contributors, 'submitterAlias': self.team['name'] } self.eligibility_hash = 23 self.patch_private_submit = patch.object(syn, "_submit", return_value=self.submission) self.patch_getEvaluation = patch.object(syn, "getEvaluation", return_value=self.eval) self.patch_get = patch.object(syn, "get", return_value=self.entity) self.patch_getTeam = patch.object(syn, "getTeam", return_value=self.team) self.patch_get_contributors = patch.object( syn, "_get_contributors", return_value=(self.contributors, self.eligibility_hash)) self.mock_private_submit = self.patch_private_submit.start() self.mock_getEvaluation = self.patch_getEvaluation.start() self.mock_get = self.patch_get.start() self.mock_getTeam = self.patch_getTeam.start() self.mock_get_contributors = self.patch_get_contributors.start()
def __init__(self, name=None, columns=None, parent=None, properties=None, annotations=None, local_state=None, **kwargs): self.properties.setdefault('columnIds',[]) if name: kwargs['name'] = name if columns: for column in columns: if isinstance(column, basestring) or isinstance(column, int) or hasattr(column, 'id'): kwargs.setdefault('columnIds',[]).append(utils.id_of(column)) elif isinstance(column, Column): kwargs.setdefault('columns_to_store',[]).append(column) else: raise ValueError("Not a column? %s" % unicode(column)) super(Schema, self).__init__(concreteType=Schema._synapse_entity_type, properties=properties, annotations=annotations, local_state=local_state, parent=parent, **kwargs)
def addColumn(self, column): """ :param column: a column object or its ID """ if isinstance(column, basestring) or isinstance( column, int) or hasattr(column, 'id'): self.properties.columnIds.append(utils.id_of(column)) elif isinstance(column, Column): if not self.__dict__.get('columns_to_store', None): self.__dict__['columns_to_store'] = [] self.__dict__['columns_to_store'].append(column) else: raise ValueError("Not a column? %s" % unicode(column))
def __init__(self, columns=None, schema=None, **kwargs): if not 'headers' in kwargs: if columns: kwargs.setdefault('headers',[]).extend([SelectColumn.from_column(column) for column in columns]) elif schema and isinstance(schema, Schema): kwargs.setdefault('headers',[]).extend([SelectColumn(id=id) for id in schema["columnIds"]]) if ('tableId' not in kwargs) and schema: kwargs['tableId'] = utils.id_of(schema) if not kwargs.get('tableId',None): raise ValueError("Table schema ID must be defined to create a RowSet") if not kwargs.get('headers',None): raise ValueError("Column headers must be defined to create a RowSet") super(RowSet, self).__init__(kwargs)
def __init__(self, targetId=None, targetVersion=None, parent=None, properties=None, annotations=None, local_state=None, **kwargs): if targetId is not None and targetVersion is not None: kwargs['linksTo'] = dict(targetId=utils.id_of(targetId), targetVersionNumber=targetVersion) elif targetId is not None and targetVersion is None: kwargs['linksTo'] = dict(targetId=utils.id_of(targetId)) elif properties is not None and 'linksTo' in properties: pass else: raise SynapseMalformedEntityError("Must provide a target id") super(Link, self).__init__(concreteType=Link._synapse_entity_type, properties=properties, annotations=annotations, local_state=local_state, parent=parent, **kwargs)
def setup(self): self.eval_id = '9090' self.contributors = None self.entity = { 'versionNumber': 7, 'id': 'syn1009', 'etag': 'etag', 'name': 'entity name' } self.eval = { 'contentSource': self.entity['id'], 'createdOn': '2013-11-06T06:04:26.789Z', 'etag': '86485ea1-8c89-4f24-a0a4-2f63bc011091', 'id': self.eval_id, 'name': 'test evaluation', 'ownerId': '1560252', 'status': 'OPEN', 'submissionReceiptMessage': 'Your submission has been received.!' } self.team = { 'id': 5, 'name': 'Team Blue' } self.submission = { 'id': 123, 'evaluationId': self.eval_id, 'name': self.entity['name'], 'entityId': self.entity['id'], 'versionNumber': self.entity['versionNumber'], 'teamId': id_of(self.team['id']), 'contributors': self.contributors, 'submitterAlias': self.team['name'] } self.eligibility_hash = 23 self.patch_private_submit = patch.object(syn, "_submit", return_value=self.submission) self.patch_getEvaluation = patch.object(syn, "getEvaluation", return_value=self.eval) self.patch_get = patch.object(syn, "get", return_value=self.entity) self.patch_getTeam = patch.object(syn, "getTeam", return_value= self.team) self.patch_get_contributors = patch.object(syn, "_get_contributors", return_value=(self.contributors, self.eligibility_hash)) self.mock_private_submit = self.patch_private_submit.start() self.mock_getEvaluation = self.patch_getEvaluation.start() self.mock_get = self.patch_get.start() self.mock_getTeam = self.patch_getTeam.start() self.mock_get_contributors = self.patch_get_contributors.start()
def __init__(self, properties=None, annotations=None, local_state=None, parent=None, **kwargs): if properties: if isinstance(properties, collections.Mapping): if 'annotations' in properties and isinstance(properties['annotations'], collections.Mapping): annotations.update(properties['annotations']) del properties['annotations'] self.__dict__['properties'].update(properties) else: raise SynapseMalformedEntityError('Unknown argument type: properties is a %s' % str(type(properties))) if annotations: if isinstance(annotations, collections.Mapping): self.__dict__['annotations'].update(annotations) elif isinstance(annotations, basestring): self.properties['annotations'] = annotations else: raise SynapseMalformedEntityError('Unknown argument type: annotations is a %s' % str(type(annotations))) if local_state: if isinstance(local_state, collections.Mapping): self.local_state(local_state) else: raise SynapseMalformedEntityError('Unknown argument type: local_state is a %s' % str(type(local_state))) for key in self.__class__._local_keys: if key not in self.__dict__: self.__dict__[key] = None # Extract parentId from parent if 'parentId' not in kwargs: try: if parent: kwargs['parentId'] = id_of(parent) except Exception: if parent and isinstance(parent, Entity) and 'id' not in parent: raise SynapseMalformedEntityError("Couldn't find 'id' of parent. Has it been stored in Synapse?") else: raise SynapseMalformedEntityError("Couldn't find 'id' of parent.") # Note: that this will work properly if derived classes declare their # internal state variable *before* invoking super(...).__init__(...) for key, value in kwargs.items(): self.__setitem__(key, value) if 'concreteType' not in self: self['concreteType'] = self.__class__._synapse_entity_type
def __init__(self, **kwargs): # Verify that the parameters are correct if not 'owner' in kwargs: raise ValueError('Wiki constructor must have an owner specified') # Initialize the file handle list to be an empty list if 'attachmentFileHandleIds' not in kwargs: kwargs['attachmentFileHandleIds'] = [] # Move the 'fileHandles' into the proper (wordier) bucket if 'fileHandles' in kwargs: for handle in kwargs['fileHandles']: kwargs['attachmentFileHandleIds'].append(handle) del kwargs['fileHandles'] super(Wiki, self).__init__(kwargs) self.ownerId = id_of(self.owner) del self['owner']
def __init__(self, columns=None, schema=None, **kwargs): if not 'headers' in kwargs: if columns: kwargs.setdefault('headers', []).extend( [SelectColumn.from_column(column) for column in columns]) elif schema and isinstance(schema, Schema): kwargs.setdefault('headers', []).extend( [SelectColumn(id=id) for id in schema["columnIds"]]) if ('tableId' not in kwargs) and schema: kwargs['tableId'] = utils.id_of(schema) if not kwargs.get('tableId', None): raise ValueError( "Table schema ID must be defined to create a RowSet") if not kwargs.get('headers', None): raise ValueError( "Column headers must be defined to create a RowSet") super(RowSet, self).__init__(kwargs)
def usedEntity(self, target, targetVersion=None, wasExecuted=False): """ TODO_Sphinx :param target: either a synapse entity or entity id (as a string) :param targetVersion: optionally specify the version of the entity :param wasExecuted: boolean indicating whether the entity represents code that was executed to produce the result """ reference = {'targetId':id_of(target)} if targetVersion: reference['targetVersionNumber'] = int(targetVersion) else: try: # If we have an Entity, get it's version number reference['targetVersionNumber'] = target['versionNumber'] except (KeyError, TypeError): # Count on platform to get the current version of the entity from Synapse pass self['used'].append({'reference':reference, 'wasExecuted':wasExecuted, 'concreteType':'org.sagebionetworks.repo.model.provenance.UsedEntity'})
def validate(evaluation, send_messages=False, notifications=False, dry_run=False): """ It may be convenient to validate submissions in one pass before scoring them, especially if scoring takes a long time. """ print "\n\nValidating", utils.id_of(evaluation) print "-" * 60 for submission, status in syn.getSubmissionBundles(evaluation, status='RECEIVED'): ## refetch the submission so that we get the file path ## to be later replaced by a "downloadFiles" flag on getSubmissionBundles submission = syn.getSubmission(submission) is_valid, validation_message = validate_submission(submission.filePath) print submission.id, validation_message if is_valid: status.status = "VALIDATED" else: status.status = "INVALID" if not dry_run: status = syn.store(status) ## send message AFTER storing status to ensure we don't get repeat messages if not is_valid and send_messages: profile = syn.getUserProfile(submission.userId) message = VALIDATION_TEMPLATE.format( username=profile.get('firstName', profile.get('userName', profile['ownerId'])), submission_id=submission.id, submission_name=submission.name, message=validation_message) response = syn.sendMessage( userIds=[submission.userId], messageSubject="Error validating Submission to "+CHALLENGE_NAME, messageBody=message) print "sent validation error message: ", unicode(response).encode('utf-8')
def archive(evaluation, destination=None, token=None, name=None, query=None): """ Archive the submissions for the given evaluation queue and store them in the destination synapse folder. :param evaluation: a synapse evaluation queue or its ID :param destination: a synapse folder or its ID :param query: a query that will return the desired submissions. At least the ID must be returned. defaults to _select * from evaluation_[EVAL_ID] where status=="SCORED"_. """ challenge = {'5877348':'FusionDetection','5952651':'IsoformQuantification'} if not query: query = 'select * from evaluation_%s where status=="SCORED"' % utils.id_of(evaluation) path = challenge[utils.id_of(evaluation)] ## for each submission, download it's associated file and write a line of metadata results = Query(query=query) if 'objectId' not in results.headers: raise ValueError("Can't find the required field \"objectId\" in the results of the query: \"{0}\"".format(query)) for result in results: #Check if the folder has already been created in synapse #(This is used as a tool to check submissions that have already been cached) new_map = [] mapping = syn.get("syn7348150") submissionId = result[results.headers.index('objectId')] check = syn.query('select id,name from folder where parentId == "%s" and name == "%s"' % (destination,submissionId)) if check['totalNumberOfResults']==0: os.mkdir(submissionId) submission = syn.getSubmission(submissionId, downloadFile=False) if submission.entity.externalURL is None: submission = syn.getSubmission(submissionId, downloadLocation=submissionId) newFilePath = submission.filePath.replace(' ', '_') shutil.move(submission.filePath,newFilePath) #Store CWL file in bucket os.system('gsutil cp -R %s gs://smc-rna-eval/entries/%s' % (submissionId,path)) with open(newFilePath,"r") as cwlfile: docs = yaml.load(cwlfile) merged = docs['$graph'] docker = [] for tools in merged: if tools['class'] == 'CommandLineTool': if tools.get('requirements',None) is not None: for i in tools['requirements']: if i.get('dockerPull',None) is not None: docker.append(i['dockerPull']) if tools.get('hints', None) is not None: for i in tools['hints']: if i.get('dockerPull',None) is not None: docker.append(i['dockerPull']) if tools['class'] == 'Workflow': hints = tools.get("hints",None) if hints is not None: for i in tools['hints']: if os.path.basename(i['class']) == "synData": temp = syn.get(i['entity']) #create synid and index mapping new_map.append([temp.id,"gs://smc-rna-eval/entries/%s/%s/%s" %(path,submissionId,temp.name)]) #Store index files os.system('gsutil cp %s gs://smc-rna-eval/entries/%s/%s' % (temp.path,path,submissionId)) os.system('rm -rf ~/.synapseCache/*') else: if submission.entity.externalURL.endswith("/"): submission.entity.externalURL = submission.entity.externalURL[:-1] taskId = submission.entity.externalURL.split("/")[-1] test = subprocess.check_call(["python", os.path.join(os.path.dirname(__file__),"../../SMC-RNA-Eval/sbg-download.py"), "--token", token, taskId, submissionId]) os.system('gsutil cp -R %s gs://smc-rna-eval/entries/%s' % (submissionId,path)) #Pull down docker containers with open("%s/submission.cwl" % submissionId,"r") as cwlfile: docs = yaml.load(cwlfile) # merged = docs['steps'] # docker = [] # for tools in merged: # for hint in tools['run']['hints']: # if hint['class'] == 'DockerRequirement': # docker.append(hint['dockerPull']) # for require in tools['run']['requirements']: # if require.get('requirements') is not None: # for i in require.get('requirements'): # if i['class'] == 'DockerRequirement': # docker.append(i['dockerPull']) docker = [] for tools in docs['hints']: if tools['class'] == "DockerRequirement": docker.append(tools['dockerPull']) os.system('rm -rf %s' % submissionId) if len(new_map) > 0: table = syn.store(Table(mapping, new_map)) #Pull, save, and store docker containers docker = set(docker) for i in docker: fileName = os.path.basename(i).replace(":","_") os.system('sudo -i docker pull %s' % i) #os.system('sudo -i docker save %s' % i) os.system('sudo docker save -o %s.tar %s' %(fileName,i)) os.system('sudo chmod a+r %s.tar' % fileName) os.system('gsutil cp %s.tar gs://smc-rna-eval/entries/%s/%s' % (fileName,path,submissionId)) os.remove("%s.tar" % fileName) submission_parent = syn.store(Folder(submissionId,parent=destination))
def archive(evaluation, destination=None, name=None, query=None): """ Archive the submissions for the given evaluation queue and store them in the destination synapse folder. :param evaluation: a synapse evaluation queue or its ID :param destination: a synapse folder or its ID :param query: a query that will return the desired submissions. At least the ID must be returned. defaults to _select * from evaluation_[EVAL_ID] where status=="SCORED"_. """ challenge = { '5877348': 'FusionDetection', '5952651': 'IsoformQuantification' } if not query: query = 'select * from evaluation_%s where status=="VALIDATED"' % utils.id_of( evaluation) path = challenge[utils.id_of(evaluation)] ## for each submission, download it's associated file and write a line of metadata results = Query(query=query) if 'objectId' not in results.headers: raise ValueError( "Can't find the required field \"objectId\" in the results of the query: \"{0}\"" .format(query)) for result in results: #Check if the folder has already been created in synapse #(This is used as a tool to check submissions that have already been cached) submissionId = result[results.headers.index('objectId')] check = syn.query( 'select id,name from folder where parentId == "%s" and name == "%s"' % (destination, submissionId)) if check['totalNumberOfResults'] == 0: os.mkdir(submissionId) submission_parent = syn.store( Folder(submissionId, parent=destination)) submission = syn.getSubmission(submissionId, downloadLocation=submissionId) newFilePath = submission.filePath.replace(' ', '_') shutil.move(submission.filePath, newFilePath) #Store CWL file in bucket os.system('gsutil cp -R %s gs://smc-rna-cache/%s' % (submissionId, path)) with open(newFilePath, "r") as cwlfile: docs = yaml.load(cwlfile) merged = docs['$graph'] docker = [] for tools in merged: if tools['class'] == 'CommandLineTool': if tools.get('requirements', None) is not None: for i in tools['requirements']: if i.get('dockerPull', None) is not None: docker.append(i['dockerPull']) if tools['class'] == 'Workflow': hints = tools.get("hints", None) if hints is not None: for i in tools['hints']: if os.path.basename(i['class']) == "synData": temp = syn.get(i['entity']) #Store index files os.system( 'gsutil cp %s gs://smc-rna-cache/%s/%s' % (temp.path, path, submissionId)) os.system('rm -rf ~/.synapseCache/*') #Pull, save, and store docker containers docker = set(docker) for i in docker: os.system('sudo docker pull %s' % i) os.system('sudo docker save %s' % i) os.system('sudo docker save -o %s.tar %s' % (os.path.basename(i), i)) os.system('sudo chmod a+r %s.tar' % os.path.basename(i)) os.system('gsutil cp %s.tar gs://smc-rna-cache/%s/%s' % (os.path.basename(i), path, submissionId)) os.remove("%s.tar" % os.path.basename(i)) os.system('rm -rf %s' % submissionId)
def syncFromSynapse(syn, entity, path=None, ifcollision='overwrite.local', allFiles=None, followLink=False): """Synchronizes all the files in a folder (including subfolders) from Synapse and adds a readme manifest with file metadata. :param syn: A synapse object as obtained with syn = synapseclient.login() :param entity: A Synapse ID, a Synapse Entity object of type file, folder or project. :param path: An optional path where the file hierarchy will be reproduced. If not specified the files will by default be placed in the synapseCache. :param ifcollision: Determines how to handle file collisions. Maybe "overwrite.local", "keep.local", or "keep.both". Defaults to "overwrite.local". :param followLink: Determines whether the link returns the target Entity. Defaults to False :returns: list of entities (files, tables, links) This function will crawl all subfolders of the project/folder specified by `entity` and download all files that have not already been downloaded. If there are newer files in Synapse (or a local file has been edited outside of the cache) since the last download then local the file will be replaced by the new file unless "ifcollision" is changed. If the files are being downloaded to a specific location outside of the Synapse cache a file (SYNAPSE_METADATA_MANIFEST.tsv) will also be added in the path that contains the metadata (annotations, storage location and provenance of all downloaded files). See also: - :py:func:`synapseutils.sync.syncToSynapse` Example: Download and print the paths of all downloaded files:: entities = syncFromSynapse(syn, "syn1234") for f in entities: print(f.path) """ # initialize the result list if allFiles is None: allFiles = list() # perform validation check on user input if is_synapse_id(entity): entity = syn.get(entity, downloadLocation=path, ifcollision=ifcollision, followLink=followLink) if isinstance(entity, File): allFiles.append(entity) return allFiles entity_id = id_of(entity) if not is_container(entity): raise ValueError( "The provided id: %s is neither a container nor a File" % entity_id) # get the immediate children as iterator children = syn.getChildren(entity_id) # process each child for child in children: if is_container(child): # If we are downloading outside cache create directory if path is not None: new_path = os.path.join(path, child['name']) try: os.makedirs(new_path) except OSError as err: if err.errno != errno.EEXIST: raise else: new_path = None # recursively explore this container's children syncFromSynapse(syn, child['id'], new_path, ifcollision, allFiles, followLink=followLink) else: # getting the child ent = syn.get(child['id'], downloadLocation=path, ifcollision=ifcollision, followLink=followLink) if isinstance(ent, File): allFiles.append(ent) if path is not None: # If path is None files are stored in cache. filename = os.path.join(path, MANIFEST_FILENAME) filename = os.path.expanduser(os.path.normcase(filename)) generateManifest(syn, allFiles, filename) return allFiles
def syncFromSynapse(syn, entity, path=None, ifcollision='overwrite.local', allFiles = None, followLink=False): """Synchronizes all the files in a folder (including subfolders) from Synapse and adds a readme manifest with file metadata. :param syn: A synapse object as obtained with syn = synapseclient.login() :param entity: A Synapse ID, a Synapse Entity object of type folder or project. :param path: An optional path where the file hierarchy will be reproduced. If not specified the files will by default be placed in the synapseCache. :param ifcollision: Determines how to handle file collisions. May be "overwrite.local", "keep.local", or "keep.both". Defaults to "overwrite.local". :param followLink: Determines whether the link returns the target Entity. Defaults to False :returns: list of entities (files, tables, links) This function will crawl all subfolders of the project/folder specified by `entity` and download all files that have not already been downloaded. If there are newer files in Synapse (or a local file has been edited outside of the cache) since the last download then local the file will be replaced by the new file unless ifcollision is changed. If the files are being downloaded to a specific location outside of the Synapse Cache We will also add a file (SYNAPSE_METADATA_MANIFEST.tsv) in the path that contains the metadata (annotations, storage location and provenance of all downloaded files) See also: - :py:func:`synapseutils.sync.syncToSynapse` Example:: Download and print the paths of all downloaded files:: entities = syncFromSynapse(syn, "syn1234") for f in entities: print(f.path) """ if allFiles is None: allFiles = list() id = id_of(entity) results = syn.chunkedQuery("select id, name, nodeType from entity where entity.parentId=='%s'" %id) for result in results: if is_container(result): if path is not None: #If we are downloading outside cache create directory. new_path = os.path.join(path, result['entity.name']) try: os.mkdir(new_path) except OSError as err: if err.errno!=errno.EEXIST: raise print('making dir', new_path) else: new_path = None syncFromSynapse(syn, result['entity.id'], new_path, ifcollision, allFiles) else: ent = syn.get(result['entity.id'], downloadLocation = path, ifcollision = ifcollision, followLink=followLink) allFiles.append(ent) if path is not None: #If path is None files are stored in cache. filename = os.path.join(path, MANIFEST_FILENAME) filename = os.path.expanduser(os.path.normcase(filename)) generateManifest(syn, allFiles, filename) return allFiles
def create_supertable_leaderboard(evaluation): """ Create the leaderboard using a supertable, a markdown extension that dynamically builds a table by querying submissions. Because the supertable re-queries whenever the page is rendered, this step only has to be done once. """ uri_base = urllib.quote_plus("/evaluation/submission/query") # it's incredibly picky that the equals sign here has to be urlencoded, but # the later equals signs CAN'T be urlencoded. query = urllib.quote_plus('query=select * from evaluation_%s where status=="SCORED"' % utils.id_of(evaluation)) params = [ ('paging', 'true'), ('queryTableResults', 'true'), ('showIfLoggedInOnly', 'false'), ('pageSize', '25'), ('showRowNumber', 'false'), ('jsonResultsKeyName', 'rows')] # Columns specifications have 4 fields: renderer, display name, column name, sort. # Renderer and sort are usually 'none' and 'NONE'. for i, column in enumerate(LEADERBOARD_COLUMNS): fields = dict(renderer='none', sort='NONE') fields.update(column) params.append(('columnConfig%s' % i, "{renderer},{display_name},{column_name};,{sort}".format(**fields))) return "${supertable?path=" + uri_base + "%3F" + query + "&" + "&".join([key+"="+urllib.quote_plus(value) for key,value in params]) + "}"
def score(evaluation, send_messages=False, notifications=False, dry_run=False): sys.stdout.write('\n\nScoring ' + utils.id_of(evaluation)) sys.stdout.flush() ## collect statuses here for batch update statuses = [] for submission, status in syn.getSubmissionBundles(evaluation, status='VALIDATED'): ## refetch the submission so that we get the file path ## to be later replaced by a "downloadFiles" flag on getSubmissionBundles submission = syn.getSubmission(submission) try: score, message = score_submission(submission, submission.filePath) status.status = "SCORED" status.score = math.fsum(v for k,v in score.iteritems()) / len(score) status.annotations = synapseclient.annotations.to_submission_status_annotations(score) except Exception as ex1: sys.stderr.write('\n\nError scoring submission %s %s:\n' % (submission.name, submission.id)) st = StringIO() traceback.print_exc(file=st) sys.stderr.write(st.getvalue()) sys.stderr.write('\n') status.status = "INVALID" message = st.getvalue() if notifications and ADMIN_USER_IDS: submission_info = "submission id: %s\nsubmission name: %s\nsubmitted by user id: %s\n\n" % (submission.id, submission.name, submission.userId) response = syn.sendMessage( userIds=ADMIN_USER_IDS, messageSubject=CHALLENGE_NAME+": exception during scoring", messageBody=error_notification_template.format(message=submission_info+st.getvalue())) print "sent notification: ", unicode(response).encode('utf-8') if not dry_run: status = syn.store(status) ## send message AFTER storing status to ensure we don't get repeat messages if send_messages: profile = syn.getUserProfile(submission.userId) if status.status == 'SCORED': message_body = scoring_message_template.format( message=message, username=profile.get('firstName', profile.get('userName', profile['ownerId'])), submission_name=submission.name, submission_id=submission.id) subject = "Submission to "+CHALLENGE_NAME else: message_body = scoring_error_message_template.format( message=message, username=profile.get('firstName', profile.get('userName', profile['ownerId'])), submission_name=submission.name, submission_id=submission.id) subject = "Error scoring submission to "+CHALLENGE_NAME response = syn.sendMessage( userIds=[submission.userId], messageSubject=subject, messageBody=message_body) print "sent message: ", unicode(response).encode('utf-8') sys.stdout.write('.') sys.stdout.flush() sys.stdout.write('\n')
def syncFromSynapse(syn, entity, path=None, ifcollision='overwrite.local', allFiles = None, followLink=False): """Synchronizes all the files in a folder (including subfolders) from Synapse and adds a readme manifest with file metadata. :param syn: A synapse object as obtained with syn = synapseclient.login() :param entity: A Synapse ID, a Synapse Entity object of type folder or project. :param path: An optional path where the file hierarchy will be reproduced. If not specified the files will by default be placed in the synapseCache. :param ifcollision: Determines how to handle file collisions. May be "overwrite.local", "keep.local", or "keep.both". Defaults to "overwrite.local". :param followLink: Determines whether the link returns the target Entity. Defaults to False :returns: list of entities (files, tables, links) This function will crawl all subfolders of the project/folder specified by `entity` and download all files that have not already been downloaded. If there are newer files in Synapse (or a local file has been edited outside of the cache) since the last download then local the file will be replaced by the new file unless "ifcollision" is changed. If the files are being downloaded to a specific location outside of the Synapse cache a file (SYNAPSE_METADATA_MANIFEST.tsv) will also be added in the path that contains the metadata (annotations, storage location and provenance of all downloaded files). See also: - :py:func:`synapseutils.sync.syncToSynapse` Example: Download and print the paths of all downloaded files:: entities = syncFromSynapse(syn, "syn1234") for f in entities: print(f.path) """ if allFiles is None: allFiles = list() id = id_of(entity) results = syn.getChildren(id) zero_results = True for result in results: zero_results = False if is_container(result): if path is not None: #If we are downloading outside cache create directory. new_path = os.path.join(path, result['name']) try: os.makedirs(new_path) except OSError as err: if err.errno!=errno.EEXIST: raise print('making dir', new_path) else: new_path = None syncFromSynapse(syn, result['id'], new_path, ifcollision, allFiles) else: ent = syn.get(result['id'], downloadLocation = path, ifcollision = ifcollision, followLink=followLink) if isinstance(ent, File): allFiles.append(ent) if zero_results: #a http error would be raised if the synapse Id was not valid (404) or no permission (403) so at this point the entity should be get-able stderr.write("The synapse id %s is not a container (Project/Folder), attempting to get the entity anyways" % id) ent = syn.get(id, downloadLocation=path, ifcollision=ifcollision, followLink=followLink) if isinstance(ent, File): allFiles.append(ent) else: raise ValueError("The provided id: %s is was neither a container nor a File" % id) if path is not None: #If path is None files are stored in cache. filename = os.path.join(path, MANIFEST_FILENAME) filename = os.path.expanduser(os.path.normcase(filename)) generateManifest(syn, allFiles, filename) return allFiles
def create_challenge_object(project, participants_team): challenge_json = {'participantTeamId':utils.id_of(participants_team), 'projectId':utils.id_of(project)} return DictObject(**syn.restPOST("/challenge", body=json.dumps(challenge_json)))
def archive(evaluation, destination=None, token=None, name=None, query=None): """ Archive the submissions for the given evaluation queue and store them in the destination synapse folder. :param evaluation: a synapse evaluation queue or its ID :param destination: a synapse folder or its ID :param query: a query that will return the desired submissions. At least the ID must be returned. defaults to _select * from evaluation_[EVAL_ID] where status=="SCORED"_. """ challenge = {'5877348':'FusionDetection','5952651':'IsoformQuantification'} if not query: query = 'select * from evaluation_%s where status=="SCORED"' % utils.id_of(evaluation) path = challenge[utils.id_of(evaluation)] ## for each submission, download it's associated file and write a line of metadata results = Query(query=query) if 'objectId' not in results.headers: raise ValueError("Can't find the required field \"objectId\" in the results of the query: \"{0}\"".format(query)) for result in results: #Check if the folder has already been created in synapse #(This is used as a tool to check submissions that have already been cached) new_map = [] mapping = syn.get("syn7348150") submissionId = result[results.headers.index('objectId')] check = syn.query('select id,name from folder where parentId == "%s" and name == "%s"' % (destination,submissionId)) if check['totalNumberOfResults']==0: os.mkdir(submissionId) submission = syn.getSubmission(submissionId, downloadLocation=submissionId) if submission.entity.externalURL is None: newFilePath = submission.filePath.replace(' ', '_') shutil.move(submission.filePath,newFilePath) #Store CWL file in bucket os.system('gsutil cp -R %s gs://smc-rna-cache/%s' % (submissionId,path)) with open(newFilePath,"r") as cwlfile: docs = yaml.load(cwlfile) merged = docs['$graph'] docker = [] for tools in merged: if tools['class'] == 'CommandLineTool': if tools.get('requirements',None) is not None: for i in tools['requirements']: if i.get('dockerPull',None) is not None: docker.append(i['dockerPull']) if tools.get('hints', None) is not None: for i in tools['hints']: if i.get('dockerPull',None) is not None: docker.append(i['dockerPull']) if tools['class'] == 'Workflow': hints = tools.get("hints",None) if hints is not None: for i in tools['hints']: if os.path.basename(i['class']) == "synData": temp = syn.get(i['entity']) #create synid and index mapping new_map.append([temp.id,"gs://smc-rna-cache/%s/%s/%s" %(path,submissionId,temp.name)]) #Store index files os.system('gsutil cp %s gs://smc-rna-cache/%s/%s' % (temp.path,path,submissionId)) os.system('rm -rf ~/.synapseCache/*') else: os.system('rm %s' % os.path.join(submissionId, submission.name)) test = subprocess.check_call(["python", os.path.join(os.path.dirname(__file__),"../../SMC-RNA-Eval/sbg-download.py"), "--token", token, submission.name, submissionId]) os.system('gsutil cp -R %s gs://smc-rna-cache/%s' % (submissionId,path)) #Pull down docker containers with open("%s/submission.cwl" % submissionId,"r") as cwlfile: docs = yaml.load(cwlfile) merged = docs['steps'] docker = [] for tools in merged: for hint in tools['run']['hints']: if hint['class'] == 'DockerRequirement': docker.append(hint['dockerPull']) for require in tools['run']['requirements']: if require.get('requirements') is not None: for i in require.get('requirements'): if i['class'] == 'DockerRequirement': docker.append(i['dockerPull']) os.system('rm -rf %s' % submissionId) if len(new_map) > 0: table = syn.store(Table(mapping, new_map)) #Pull, save, and store docker containers docker = set(docker) for i in docker: fileName = os.path.basename(i).replace(":","_") os.system('sudo -i docker pull %s' % i) #os.system('sudo -i docker save %s' % i) os.system('sudo docker save -o %s.tar %s' %(fileName,i)) os.system('sudo chmod a+r %s.tar' % fileName) os.system('gsutil cp %s.tar gs://smc-rna-cache/%s/%s' % (fileName,path,submissionId)) os.remove("%s.tar" % fileName) submission_parent = syn.store(Folder(submissionId,parent=destination))