def add_new_rows_to_table(df, replace_table=False, dry_run=False):
    """Add rows for synapse IDs not already represented in the table or replace the whole table"""
    schema = syn.get(TABLE_SYNAPSE_ID)
    if replace_table:
        ## delete previous entries in pilot-63-progress table
        results = syn.tableQuery('select * from %s' % utils.id_of(schema),
                                 resultsAs='rowset')
        if not dry_run:
            syn.delete(results)
    else:
        results = syn.tableQuery('select synapse_id from %s' %
                                 utils.id_of(schema),
                                 includeRowIdAndRowVersion=False)
        synapse_ids = [row[0] for row in results]
        df = df[[
            synapse_id not in synapse_ids for synapse_id in df['synapse_id']
        ]]

    if df.shape[0] > 0:
        if dry_run:
            print "Dry run: would have added %d rows to pilot-63-progress table" % df.shape[
                0]
        else:
            print "Adding %d rows to pilot-63-progress table" % df.shape[0]
            syn.store(Table(schema, df))
        return df.shape[0]
    else:
        print "No new rows for pilot-63-progress table"
        return None
def create_challenge_object(project, participants_team):
    challenge_json = {
        'participantTeamId': utils.id_of(participants_team),
        'projectId': utils.id_of(project)
    }
    return DictObject(
        **syn.restPOST("/challenge", body=json.dumps(challenge_json)))
def archive(evaluation, destination=None, name=None, query=None):
    """
    Archive the submissions for the given evaluation queue and store them in the destination synapse folder.

    :param evaluation: a synapse evaluation queue or its ID
    :param destination: a synapse folder or its ID
    :param query: a query that will return the desired submissions. At least the ID must be returned.
                  defaults to _select * from evaluation_[EVAL_ID] where status=="SCORED"_.
    """
    challenge = {'5877348':'FusionDetection','5952651':'IsoformQuantification'}
    if not query:
        query = 'select * from evaluation_%s where status=="VALIDATED"' % utils.id_of(evaluation)
    path = challenge[utils.id_of(evaluation)]
    ## for each submission, download it's associated file and write a line of metadata
    results = Query(query=query)
    if 'objectId' not in results.headers:
        raise ValueError("Can't find the required field \"objectId\" in the results of the query: \"{0}\"".format(query))
    for result in results:
        #Check if the folder has already been created in synapse 
        #(This is used as a tool to check submissions that have already been cached)
        submissionId = result[results.headers.index('objectId')]
        check = syn.query('select id,name from folder where parentId == "%s" and name == "%s"' % (destination,submissionId))
        if check['totalNumberOfResults']==0:
            os.mkdir(submissionId)
            submission_parent = syn.store(Folder(submissionId,parent=destination))
            submission = syn.getSubmission(submissionId, downloadLocation=submissionId)
            newFilePath = submission.filePath.replace(' ', '_')
            shutil.move(submission.filePath,newFilePath)
            #Store CWL file in bucket
            os.system('gsutil cp -R %s gs://smc-rna-cache/%s' % (submissionId,path))
            with open(newFilePath,"r") as cwlfile:
                docs = yaml.load(cwlfile)
                merged = docs['$graph']
                docker = []
                for tools in merged:
                    if tools['class'] == 'CommandLineTool':
                        if tools.get('requirements',None) is not None:
                            for i in tools['requirements']:
                                if i.get('dockerPull',None) is not None:
                                    docker.append(i['dockerPull'])
                    if tools['class'] == 'Workflow':
                        hints = tools.get("hints",None)
                        if hints is not None:
                            for i in tools['hints']:
                                if os.path.basename(i['class']) == "synData":
                                    temp = syn.get(i['entity'])
                                    #Store index files
                                    os.system('gsutil cp %s gs://smc-rna-cache/%s/%s' % (temp.path,path,submissionId))
            os.system('rm -rf ~/.synapseCache/*')
            #Pull, save, and store docker containers
            docker = set(docker)
            for i in docker:
                os.system('sudo docker pull %s' % i)
                os.system('sudo docker save %s' % i)
                os.system('sudo docker save -o %s.tar %s' %(os.path.basename(i),i))
                os.system('sudo chmod a+r %s.tar' % os.path.basename(i))
                os.system('gsutil cp %s.tar gs://smc-rna-cache/%s/%s' % (os.path.basename(i),path,submissionId))
                os.remove("%s.tar" % os.path.basename(i))
            os.system('rm -rf %s' % submissionId)
def archive(evaluation, destination=None, name=None, query=None):
    """
    Archive the submissions for the given evaluation queue and store them in the destination synapse folder.

    :param evaluation: a synapse evaluation queue or its ID
    :param destination: a synapse folder or its ID
    :param query: a query that will return the desired submissions. At least the ID must be returned.
                  defaults to _select * from evaluation_[EVAL_ID] where status=="SCORED"_.
    """
    tempdir = tempfile.mkdtemp()
    archive_dirname = 'submissions_%s' % utils.id_of(evaluation)

    if not query:
        query = 'select * from evaluation_%s where status=="SCORED"' % utils.id_of(
            evaluation)

    ## for each submission, download it's associated file and write a line of metadata
    results = Query(query=query)
    if not name:
        name = 'submissions_%s.tgz' % utils.id_of(evaluation)
    tar_path = os.path.join(tempdir, name)
    metadata_file_path = os.path.join(tempdir, 'submission_metadata.csv')
    print "creating tar at:", tar_path

    ## for each submission, we add a file to the tar and a row
    ## to the metadata .csv file
    with tarfile.open(tar_path, mode='w:gz') as archive:
        with open(metadata_file_path, 'w') as f:

            ## write header row to .csv file
            header = ','.join(results.headers)
            print header
            f.write(header + '\n')

            ## add submissions to archive and write rows to .csv file
            for result in results:
                ## retrieve file into cache and copy it to destination
                submission = syn.getSubmission(
                    result[results.headers.index('objectId')])
                archive.add(submission.filePath,
                            arcname=os.path.join(
                                archive_dirname, submission.id + "_" +
                                os.path.basename(submission.filePath)))
                line = (','.join(unicode(item)
                                 for item in result)).encode('utf-8')
                print line
                f.write(line + '\n')

        ## add metadata .csv file to the tar
        archive.add(name=metadata_file_path,
                    arcname=os.path.join(archive_dirname,
                                         'submission_metadata.csv'))

    entity = syn.store(File(tar_path, parent=destination),
                       evaluation_id=utils.id_of(evaluation))
    print "created:", entity.id, entity.name
    return entity.id
Beispiel #5
0
def archive(evaluation, destination=None, name=None, query=None):
    """
    Archive the submissions for the given evaluation queue and store them in the destination synapse folder.

    :param evaluation: a synapse evaluation queue or its ID
    :param destination: a synapse folder or its ID
    :param query: a query that will return the desired submissions. At least the ID must be returned.
                  defaults to _select * from evaluation_[EVAL_ID] where status=="SCORED"_.
    """
    tempdir = tempfile.mkdtemp()
    archive_dirname = 'submissions_%s' % utils.id_of(evaluation)

    if not query:
        query = 'select * from evaluation_%s where status=="SCORED"' % utils.id_of(
            evaluation)

    ## for each submission, download it's associated file and write a line of metadata
    results = Query(query=query)
    if 'objectId' not in results.headers:
        raise ValueError(
            "Can't find the required field \"objectId\" in the results of the query: \"{0}\""
            .format(query))
    if not name:
        name = 'submissions_%s.tgz' % utils.id_of(evaluation)
    tar_path = os.path.join(tempdir, name)
    print "creating tar at:", tar_path
    print results.headers
    with tarfile.open(tar_path, mode='w:gz') as archive:
        with open(os.path.join(tempdir, 'submission_metadata.csv'), 'w') as f:
            f.write(
                (','.join(hdr for hdr in (results.headers + ['filename'])) +
                 '\n').encode('utf-8'))
            for result in results:
                ## retrieve file into cache and copy it to destination
                submission = syn.getSubmission(
                    result[results.headers.index('objectId')])
                prefixed_filename = submission.id + "_" + os.path.basename(
                    submission.filePath)
                archive.add(submission.filePath,
                            arcname=os.path.join(archive_dirname,
                                                 prefixed_filename))
                line = (','.join(
                    unicode(item)
                    for item in (result +
                                 [prefixed_filename]))).encode('utf-8')
                print line
                f.write(line + '\n')
        archive.add(name=os.path.join(tempdir, 'submission_metadata.csv'),
                    arcname=os.path.join(archive_dirname,
                                         'submission_metadata.csv'))

    entity = syn.store(File(tar_path, parent=destination),
                       evaluation_id=utils.id_of(evaluation))
    print "created:", entity.id, entity.name
    return entity.id
def tableUpdateWhere(tableSchema, whereClause, setDict):
    """ The UPDATE statement is used to update existing rows in a table.
    """
    from synapseclient.table import Table
    from synapseclient.utils import id_of
    import tempfile
    id = id_of(tableSchema)
    query = 'select %s from %s where %s' % (','.join(setDict.keys()), id, whereClause)
    df = syn.tableQuery(query).asDataFrame()
    for key, value in setDict.items():
        df[key] = value
    print(df)
    # df.to_csv('skit.csv')
    return syn.store(Table(id_of(tableSchema), 'skit.csv'))
def archive(evaluation, destination=None, name=None, query=None):
    """
    Archive the submissions for the given evaluation queue and store them in the destination synapse folder.

    :param evaluation: a synapse evaluation queue or its ID
    :param destination: a synapse folder or its ID
    :param query: a query that will return the desired submissions. At least the ID must be returned.
                  defaults to _select * from evaluation_[EVAL_ID] where status=="SCORED"_.
    """
    tempdir = tempfile.mkdtemp()
    archive_dirname = 'submissions_%s' % utils.id_of(evaluation)

    if not query:
        query = 'select * from evaluation_%s where status=="SCORED"' % utils.id_of(evaluation)

    ## for each submission, download it's associated file and write a line of metadata
    results = Query(query=query)
    if not name:
        name = 'submissions_%s.tgz' % utils.id_of(evaluation)
    tar_path = os.path.join(tempdir, name)
    metadata_file_path = os.path.join(tempdir, 'submission_metadata.csv')
    print "creating tar at:", tar_path

    ## for each submission, we add a file to the tar and a row
    ## to the metadata .csv file
    with tarfile.open(tar_path, mode='w:gz') as archive:
        with open(metadata_file_path, 'w') as f:

            ## write header row to .csv file
            header = ','.join(results.headers)
            print header
            f.write(header + '\n')

            ## add submissions to archive and write rows to .csv file 
            for result in results:
                ## retrieve file into cache and copy it to destination
                submission = syn.getSubmission(result[results.headers.index('objectId')])
                archive.add(submission.filePath, arcname=os.path.join(archive_dirname, submission.id + "_" + os.path.basename(submission.filePath)))
                line = (','.join(unicode(item) for item in result)).encode('utf-8')
                print line
                f.write(line + '\n')

        ## add metadata .csv file to the tar
        archive.add(
            name=metadata_file_path,
            arcname=os.path.join(archive_dirname, 'submission_metadata.csv'))

    entity = syn.store(File(tar_path, parent=destination), evaluation_id=utils.id_of(evaluation))
    print "created:", entity.id, entity.name
    return entity.id
def test_id_of():
    assert utils.id_of(1) == "1"
    assert utils.id_of("syn12345") == "syn12345"
    assert utils.id_of({"foo": 1, "id": 123}) == 123
    assert_raises(ValueError, utils.id_of, {"foo": 1, "idzz": 123})
    assert utils.id_of({"properties": {"id": 123}}) == 123
    assert_raises(ValueError, utils.id_of, {"properties": {"qq": 123}})
    assert_raises(ValueError, utils.id_of, object())

    class Foo:
        def __init__(self, id):
            self.properties = {"id": id}

    foo = Foo(123)
    assert utils.id_of(foo) == 123
def test_id_of():
    assert utils.id_of(1) == '1'
    assert utils.id_of('syn12345') == 'syn12345'
    assert utils.id_of({'foo':1, 'id':123}) == 123
    assert_raises(ValueError, utils.id_of, {'foo':1, 'idzz':123})
    assert utils.id_of({'properties':{'id':123}}) == 123
    assert_raises(ValueError, utils.id_of, {'properties':{'qq':123}})
    assert_raises(ValueError, utils.id_of, object())

    class Foo:
        def __init__(self, id):
            self.properties = {'id':id}

    foo = Foo(123)
    assert utils.id_of(foo) == 123
def test_id_of():
    assert utils.id_of(1) == '1'
    assert utils.id_of('syn12345') == 'syn12345'
    assert utils.id_of({'foo':1, 'id':123}) == 123
    assert_raises(ValueError, utils.id_of, {'foo':1, 'idzz':123})
    assert utils.id_of({'properties':{'id':123}}) == 123
    assert_raises(ValueError, utils.id_of, {'properties':{'qq':123}})
    assert_raises(ValueError, utils.id_of, object())

    class Foo:
        def __init__(self, id):
            self.properties = {'id':id}

    foo = Foo(123)
    assert utils.id_of(foo) == 123
Beispiel #11
0
 def __init__(self,
              name=None,
              columns=None,
              parent=None,
              properties=None,
              annotations=None,
              local_state=None,
              **kwargs):
     self.properties.setdefault('columnIds', [])
     if name: kwargs['name'] = name
     if columns:
         for column in columns:
             if isinstance(column, basestring) or isinstance(
                     column, int) or hasattr(column, 'id'):
                 kwargs.setdefault('columnIds',
                                   []).append(utils.id_of(column))
             elif isinstance(column, Column):
                 kwargs.setdefault('columns_to_store', []).append(column)
             else:
                 raise ValueError("Not a column? %s" % unicode(column))
     super(Schema, self).__init__(concreteType=Schema._synapse_entity_type,
                                  properties=properties,
                                  annotations=annotations,
                                  local_state=local_state,
                                  parent=parent,
                                  **kwargs)
Beispiel #12
0
    def usedEntity(self, target, targetVersion=None, wasExecuted=False):
        """
        TODO_Sphinx
        
        :param target:        either a synapse entity or entity id (as a string)
        :param targetVersion: optionally specify the version of the entity
        :param wasExecuted:   boolean indicating whether the entity represents code that was executed to produce the result
        """

        reference = {'targetId': id_of(target)}
        if targetVersion:
            reference['targetVersionNumber'] = int(targetVersion)
        else:
            try:
                # If we have an Entity, get it's version number
                reference['targetVersionNumber'] = target['versionNumber']
            except (KeyError, TypeError):
                # Count on platform to get the current version of the entity from Synapse
                pass
        self['used'].append({
            'reference':
            reference,
            'wasExecuted':
            wasExecuted,
            'concreteType':
            'org.sagebionetworks.repo.model.provenance.UsedEntity'
        })
Beispiel #13
0
def create_supertable_leaderboard(evaluation, leaderboard_columns):
    """
    Create the leaderboard using a supertable, a markdown extension that dynamically
    builds a table by querying submissions. Because the supertable re-queries whenever
    the page is rendered, this step only has to be done once.
    """
    uri_base = urllib.quote_plus("/evaluation/submission/query")
    # it's incredibly picky that the equals sign here has to be urlencoded, but
    # the later equals signs CAN'T be urlencoded.
    query = urllib.quote_plus(
        'query=select * from evaluation_%s where status=="SCORED"' %
        utils.id_of(evaluation))
    params = [('paging', 'true'), ('queryTableResults', 'true'),
              ('showIfLoggedInOnly', 'false'), ('pageSize', '25'),
              ('showRowNumber', 'false'), ('jsonResultsKeyName', 'rows')]

    # Columns specifications have 4 fields: renderer, display name, column name, sort
    # Renderer and sort are usually 'none' and 'NONE'.
    for i, column in enumerate(leaderboard_columns):
        fields = {'renderer': 'none', 'sort': 'NONE'}
        fields.update(column)
        if 'display_name' not in fields:
            fields['display_name'] = fields['name']
        params.append(
            ('columnConfig%s' % i,
             "{renderer},{display_name},{name};,{sort}".format(**fields)))

    return "${supertable?path=" + uri_base + "%3F" + query + "&" + "&".join(
        [key + "=" + urllib.quote_plus(value) for key, value in params]) + "}"
def list_evaluations(project):
    print '\n\nEvaluations for project: ', utils.id_of(project)
    print '-' * 60

    evaluations = syn.getEvaluationByContentSource(project)
    for evaluation in evaluations:
        print "Evaluation: %s" % evaluation.id, evaluation.name.encode('utf-8')
def list_evaluations(project):
    print '\n\nEvaluations for project: ', utils.id_of(project)
    print '-' * 60

    evaluations = syn.getEvaluationByContentSource(project)
    for evaluation in evaluations:
        print "Evaluation: %s" % evaluation.id, evaluation.name.encode('utf-8')
Beispiel #16
0
    def __init__(self,
                 properties=None,
                 annotations=None,
                 local_state=None,
                 parent=None,
                 **kwargs):

        if properties:
            if isinstance(properties, collections.Mapping):
                if 'annotations' in properties and isinstance(
                        properties['annotations'], collections.Mapping):
                    annotations.update(properties['annotations'])
                    del properties['annotations']
                self.__dict__['properties'].update(properties)
            else:
                raise Exception('Unknown argument type: properties is a %s' %
                                str(type(properties)))

        if annotations:
            if isinstance(annotations, collections.Mapping):
                self.__dict__['annotations'].update(annotations)
            elif isinstance(annotations, basestring):
                self.properties['annotations'] = annotations
            else:
                raise Exception('Unknown argument type: annotations is a %s' %
                                str(type(annotations)))

        if local_state:
            if isinstance(local_state, collections.Mapping):
                self.local_state(local_state)
            else:
                raise Exception('Unknown argument type: local_state is a %s' %
                                str(type(local_state)))

        for key in self.__class__._local_keys:
            if key not in self.__dict__:
                self.__dict__[key] = None

        # Extract parentId from parent
        if 'parentId' not in kwargs:
            try:
                if parent: kwargs['parentId'] = id_of(parent)
            except Exception:
                if parent and isinstance(parent,
                                         Entity) and 'id' not in parent:
                    raise Exception(
                        'Couldn\'t find \'id\' of parent. Has it been stored in Synapse?'
                    )
                else:
                    raise Exception('Couldn\'t find \'id\' of parent.')

        # Note: that this will work properly if derived classes declare their
        # internal state variable *before* invoking super(...).__init__(...)
        for key, value in kwargs.items():
            self.__setitem__(key, value)

        if 'entityType' not in self:
            self['entityType'] = self.__class__._synapse_entity_type
def test_id_of():
    assert_equals(utils.id_of(1), '1')
    assert_equals(utils.id_of('syn12345'), 'syn12345')
    assert_equals(utils.id_of({'foo': 1, 'id': 123}), '123')
    assert_raises(ValueError, utils.id_of, {'foo': 1, 'idzz': 123})
    assert_equals(utils.id_of({'properties': {'id': 123}}), '123')
    assert_raises(ValueError, utils.id_of, {'properties': {'qq': 123}})
    assert_raises(ValueError, utils.id_of, object())

    class Foo:
        def __init__(self, id_attr_name, id):
            self.properties = {id_attr_name: id}

    id_attr_names = ['id', 'ownerId', 'tableId']

    for attr_name in id_attr_names:
        foo = Foo(attr_name, 123)
        assert_equals(utils.id_of(foo), '123')
def test_id_of():
    assert_equals(utils.id_of(1), '1')
    assert_equals(utils.id_of('syn12345'), 'syn12345')
    assert_equals(utils.id_of({'foo': 1, 'id': 123}), '123')
    assert_raises(ValueError, utils.id_of, {'foo': 1, 'idzz': 123})
    assert_equals(utils.id_of({'properties': {'id': 123}}), '123')
    assert_raises(ValueError, utils.id_of, {'properties': {'qq': 123}})
    assert_raises(ValueError, utils.id_of, object())

    class Foo:
        def __init__(self, id_attr_name, id):
            self.properties = {id_attr_name: id}

    id_attr_names = ['id', 'ownerId', 'tableId']

    for attr_name in id_attr_names:
        foo = Foo(attr_name, 123)
        assert_equals(utils.id_of(foo), '123')
Beispiel #19
0
 def removeColumn(self, column):
     """
     :param column: a column object or its ID
     """
     if isinstance(column, basestring) or isinstance(column, int) or hasattr(column, 'id'):
         self.properties.columnIds.remove(utils.id_of(column))
     elif isinstance(column, Column) and self.columns_to_store:
         self.columns_to_store.remove(column)
     else:
         ValueError("Can't remove column %s" + unicode(column))
def archive(evaluation, destination=None, name=None, query=None):
    """
    Archive the submissions for the given evaluation queue and store them in the destination synapse folder.

    :param evaluation: a synapse evaluation queue or its ID
    :param destination: a synapse folder or its ID
    :param query: a query that will return the desired submissions. At least the ID must be returned.
                  defaults to _select * from evaluation_[EVAL_ID] where status=="SCORED"_.
    """
    tempdir = tempfile.mkdtemp()
    archive_dirname = "submissions_%s" % utils.id_of(evaluation)

    if not query:
        query = 'select * from evaluation_%s where status=="SCORED"' % utils.id_of(evaluation)

    ## for each submission, download it's associated file and write a line of metadata
    results = Query(query=query)
    if "objectId" not in results.headers:
        raise ValueError('Can\'t find the required field "objectId" in the results of the query: "{0}"'.format(query))
    if not name:
        name = "submissions_%s.tgz" % utils.id_of(evaluation)
    tar_path = os.path.join(tempdir, name)
    print "creating tar at:", tar_path
    print results.headers
    with tarfile.open(tar_path, mode="w:gz") as archive:
        with open(os.path.join(tempdir, "submission_metadata.csv"), "w") as f:
            f.write((",".join(hdr for hdr in (results.headers + ["filename"])) + "\n").encode("utf-8"))
            for result in results:
                ## retrieve file into cache and copy it to destination
                submission = syn.getSubmission(result[results.headers.index("objectId")])
                prefixed_filename = submission.id + "_" + os.path.basename(submission.filePath)
                archive.add(submission.filePath, arcname=os.path.join(archive_dirname, prefixed_filename))
                line = (",".join(unicode(item) for item in (result + [prefixed_filename]))).encode("utf-8")
                print line
                f.write(line + "\n")
        archive.add(
            name=os.path.join(tempdir, "submission_metadata.csv"),
            arcname=os.path.join(archive_dirname, "submission_metadata.csv"),
        )

    entity = syn.store(File(tar_path, parent=destination), evaluation_id=utils.id_of(evaluation))
    print "created:", entity.id, entity.name
    return entity.id
Beispiel #21
0
    def __init__(self, **kwargs):
        #Verify that the parameters are correct
        if not 'owner' in kwargs:
            sys.stderr.write('Wiki constructor must have an owner specified')
            raise ValueError

        super(Wiki, self).__init__(kwargs)
        self.ownerType=guess_object_type(self.owner)
        self.ownerId=id_of(self.owner)
        del self['owner']
    def __init__(self, properties=None, annotations=None, local_state=None, parent=None, **kwargs):

        if properties:
            if isinstance(properties, collections.Mapping):
                if "annotations" in properties and isinstance(properties["annotations"], collections.Mapping):
                    annotations.update(properties["annotations"])
                    del properties["annotations"]
                self.__dict__["properties"].update(properties)
            else:
                raise SynapseMalformedEntityError("Unknown argument type: properties is a %s" % str(type(properties)))

        if annotations:
            if isinstance(annotations, collections.Mapping):
                self.__dict__["annotations"].update(annotations)
            elif isinstance(annotations, str):
                self.properties["annotations"] = annotations
            else:
                raise SynapseMalformedEntityError("Unknown argument type: annotations is a %s" % str(type(annotations)))

        if local_state:
            if isinstance(local_state, collections.Mapping):
                self.local_state(local_state)
            else:
                raise SynapseMalformedEntityError("Unknown argument type: local_state is a %s" % str(type(local_state)))

        for key in self.__class__._local_keys:
            if key not in self.__dict__:
                self.__dict__[key] = None

        # Extract parentId from parent
        if "parentId" not in kwargs:
            if parent:
                try:
                    kwargs["parentId"] = id_of(parent)
                except Exception:
                    if isinstance(parent, Entity) and "id" not in parent:
                        raise SynapseMalformedEntityError(
                            "Couldn't find 'id' of parent.  Has it been stored in Synapse?"
                        )
                    else:
                        raise SynapseMalformedEntityError("Couldn't find 'id' of parent.")

        # Note: that this will work properly if derived classes declare their
        # internal state variable *before* invoking super(...).__init__(...)
        for key, value in six.iteritems(kwargs):
            self.__setitem__(key, value)

        if "concreteType" not in self:
            self["concreteType"] = self.__class__._synapse_entity_type

        ## Only project can be top-level. All other entity types require parentId
        ## don't enforce this for generic Entity
        if "parentId" not in self and not isinstance(self, Project) and not type(self) == Entity:
            raise SynapseMalformedEntityError("Entities of type %s must have a parentId." % type(self))
Beispiel #23
0
 def removeColumn(self, column):
     """
     :param column: a column object or its ID
     """
     if isinstance(column, basestring) or isinstance(
             column, int) or hasattr(column, 'id'):
         self.properties.columnIds.remove(utils.id_of(column))
     elif isinstance(column, Column) and self.columns_to_store:
         self.columns_to_store.remove(column)
     else:
         ValueError("Can't remove column %s" + unicode(column))
Beispiel #24
0
 def addColumn(self, column):
     """
     :param column: a column object or its ID
     """
     if isinstance(column, basestring) or isinstance(column, int) or hasattr(column, 'id'):
         self.properties.columnIds.append(utils.id_of(column))
     elif isinstance(column, Column):
         if not self.__dict__.get('columns_to_store', None):
             self.__dict__['columns_to_store'] = []
         self.__dict__['columns_to_store'].append(column)
     else:
         raise ValueError("Not a column? %s" % unicode(column))
def test_store_table_datetime():
    current_datetime = datetime.fromtimestamp(round(time.time(), 3))
    schema = syn.store(
        Schema("testTable", [Column(name="testerino", columnType='DATE')],
               project))
    rowset = RowSet(rows=[Row([current_datetime])], schema=schema)
    rowset_table = syn.store(Table(schema, rowset))

    query_result = syn.tableQuery("select * from %s" % id_of(schema),
                                  resultsAs="rowset")
    assert_equals(current_datetime,
                  query_result.rowset['rows'][0]['values'][0])
Beispiel #26
0
def syncFromSynapse(syn, entity, path=None, ifcollision='overwrite.local', allFiles = None):
    """Synchronizes all the files in a folder (including subfolders) from Synapse.

    :param syn:    A synapse object as obtained with syn = synapseclient.login()

    :param entity:  A Synapse ID, a Synapse Entity object of type folder or project.

    :param path: An optional path where the file hierarchy will be
                 reproduced.  If not specified the files will by default
                 be placed in the synapseCache.

    :param ifcollision:   Determines how to handle file collisions.
                          May be "overwrite.local", "keep.local", or "keep.both".
                          Defaults to "overwrite.local".


    :returns: list of entities (files, tables, links)

    This function will crawl all subfolders of the project/folder
    specified by `id` and download all files that have not already
    been downloaded.  If there are newer files in Synapse (or a local
    file has been edited outside of the cache) since the last download
    then local the file will be replaced by the new file unless
    ifcollision is changed.

    Example::
    Download and print the paths of all downloaded files::

        entities = syncFromSynapse(syn, "syn1234")
        for f in entities:
            print(f.path)
    """
    if allFiles is None: allFiles = list()
    id = id_of(entity)
    results = syn.chunkedQuery("select id, name, nodeType from entity where entity.parentId=='%s'" %id)
    for result in results:
        if is_container(result):
            if path is not None:  #If we are downloading outside cache create directory.
                new_path = os.path.join(path, result['entity.name'])
                try:
                    os.mkdir(new_path)
                except OSError as err:
                    if err.errno!=errno.EEXIST:
                        raise
                print('making dir', new_path)
            else:
                new_path = None
            syncFromSynapse(syn, result['entity.id'], new_path, ifcollision, allFiles)
        else:
            ent = syn.get(result['entity.id'], downloadLocation = path, ifcollision = ifcollision)
            allFiles.append(ent)
    return allFiles
Beispiel #27
0
    def __init__(self, properties=None, annotations=None, local_state=None, parent=None, **kwargs):

        if properties:
            if isinstance(properties, collections.Mapping):
                if 'annotations' in properties and isinstance(properties['annotations'], collections.Mapping):
                    annotations.update(properties['annotations'])
                    del properties['annotations']
                self.__dict__['properties'].update(properties)
            else:
                raise SynapseMalformedEntityError('Unknown argument type: properties is a %s' % str(type(properties)))

        if annotations:
            if isinstance(annotations, collections.Mapping):
                self.__dict__['annotations'].update(annotations)
            elif isinstance(annotations, basestring):
                self.properties['annotations'] = annotations
            else:
                raise SynapseMalformedEntityError('Unknown argument type: annotations is a %s' % str(type(annotations)))

        if local_state:
            if isinstance(local_state, collections.Mapping):
                self.local_state(local_state)
            else:
                raise SynapseMalformedEntityError('Unknown argument type: local_state is a %s' % str(type(local_state)))

        for key in self.__class__._local_keys:
            if key not in self.__dict__:
                self.__dict__[key] = None

        # Extract parentId from parent
        if 'parentId' not in kwargs:
            if parent:
                try:
                    kwargs['parentId'] = id_of(parent)
                except Exception:
                    if isinstance(parent, Entity) and 'id' not in parent:
                        raise SynapseMalformedEntityError("Couldn't find 'id' of parent.  Has it been stored in Synapse?")
                    else:
                        raise SynapseMalformedEntityError("Couldn't find 'id' of parent.")

        # Note: that this will work properly if derived classes declare their
        # internal state variable *before* invoking super(...).__init__(...)
        for key, value in kwargs.items():
            self.__setitem__(key, value)

        if 'concreteType' not in self:
            self['concreteType'] = self.__class__._synapse_entity_type

        ## Only project can be top-level. All other entity types require parentId
        ## don't enforce this for generic Entity
        if 'parentId' not in self and not isinstance(self, Project) and not type(self)==Entity:
            raise SynapseMalformedEntityError("Entities of type %s must have a parentId." % type(self))
Beispiel #28
0
    def setup(self):
        self.eval_id = '9090'
        self.contributors = None
        self.entity = {
            'versionNumber': 7,
            'id': 'syn1009',
            'etag': 'etag',
            'name': 'entity name'
        }
        self.eval = {
            'contentSource': self.entity['id'],
            'createdOn': '2013-11-06T06:04:26.789Z',
            'etag': '86485ea1-8c89-4f24-a0a4-2f63bc011091',
            'id': self.eval_id,
            'name': 'test evaluation',
            'ownerId': '1560252',
            'status': 'OPEN',
            'submissionReceiptMessage': 'Your submission has been received.!'
        }
        self.team = {'id': 5, 'name': 'Team Blue'}
        self.submission = {
            'id': 123,
            'evaluationId': self.eval_id,
            'name': self.entity['name'],
            'entityId': self.entity['id'],
            'versionNumber': self.entity['versionNumber'],
            'teamId': id_of(self.team['id']),
            'contributors': self.contributors,
            'submitterAlias': self.team['name']
        }
        self.eligibility_hash = 23

        self.patch_private_submit = patch.object(syn,
                                                 "_submit",
                                                 return_value=self.submission)
        self.patch_getEvaluation = patch.object(syn,
                                                "getEvaluation",
                                                return_value=self.eval)
        self.patch_get = patch.object(syn, "get", return_value=self.entity)
        self.patch_getTeam = patch.object(syn,
                                          "getTeam",
                                          return_value=self.team)
        self.patch_get_contributors = patch.object(
            syn,
            "_get_contributors",
            return_value=(self.contributors, self.eligibility_hash))

        self.mock_private_submit = self.patch_private_submit.start()
        self.mock_getEvaluation = self.patch_getEvaluation.start()
        self.mock_get = self.patch_get.start()
        self.mock_getTeam = self.patch_getTeam.start()
        self.mock_get_contributors = self.patch_get_contributors.start()
Beispiel #29
0
 def __init__(self, name=None, columns=None, parent=None, properties=None, annotations=None, local_state=None, **kwargs):
     self.properties.setdefault('columnIds',[])
     if name: kwargs['name'] = name
     if columns:
         for column in columns:
             if isinstance(column, basestring) or isinstance(column, int) or hasattr(column, 'id'):
                 kwargs.setdefault('columnIds',[]).append(utils.id_of(column))
             elif isinstance(column, Column):
                 kwargs.setdefault('columns_to_store',[]).append(column)
             else:
                 raise ValueError("Not a column? %s" % unicode(column))
     super(Schema, self).__init__(concreteType=Schema._synapse_entity_type, properties=properties, 
                                annotations=annotations, local_state=local_state, parent=parent, **kwargs)
Beispiel #30
0
 def addColumn(self, column):
     """
     :param column: a column object or its ID
     """
     if isinstance(column, basestring) or isinstance(
             column, int) or hasattr(column, 'id'):
         self.properties.columnIds.append(utils.id_of(column))
     elif isinstance(column, Column):
         if not self.__dict__.get('columns_to_store', None):
             self.__dict__['columns_to_store'] = []
         self.__dict__['columns_to_store'].append(column)
     else:
         raise ValueError("Not a column? %s" % unicode(column))
Beispiel #31
0
    def __init__(self, columns=None, schema=None, **kwargs):
        if not 'headers' in kwargs:
            if columns:
                kwargs.setdefault('headers',[]).extend([SelectColumn.from_column(column) for column in columns])
            elif schema and isinstance(schema, Schema):
                kwargs.setdefault('headers',[]).extend([SelectColumn(id=id) for id in schema["columnIds"]])
        if ('tableId' not in kwargs) and schema:
            kwargs['tableId'] = utils.id_of(schema)
        if not kwargs.get('tableId',None):
            raise ValueError("Table schema ID must be defined to create a RowSet")
        if not kwargs.get('headers',None):
            raise ValueError("Column headers must be defined to create a RowSet")

        super(RowSet, self).__init__(kwargs)
Beispiel #32
0
 def __init__(self,
              targetId=None,
              targetVersion=None,
              parent=None,
              properties=None,
              annotations=None,
              local_state=None,
              **kwargs):
     if targetId is not None and targetVersion is not None:
         kwargs['linksTo'] = dict(targetId=utils.id_of(targetId),
                                  targetVersionNumber=targetVersion)
     elif targetId is not None and targetVersion is None:
         kwargs['linksTo'] = dict(targetId=utils.id_of(targetId))
     elif properties is not None and 'linksTo' in properties:
         pass
     else:
         raise SynapseMalformedEntityError("Must provide a target id")
     super(Link, self).__init__(concreteType=Link._synapse_entity_type,
                                properties=properties,
                                annotations=annotations,
                                local_state=local_state,
                                parent=parent,
                                **kwargs)
    def setup(self):
        self.eval_id = '9090'
        self.contributors = None
        self.entity = {
            'versionNumber': 7,
            'id': 'syn1009',
            'etag': 'etag',
            'name': 'entity name'
        }
        self.eval = {
            'contentSource': self.entity['id'],
            'createdOn': '2013-11-06T06:04:26.789Z',
            'etag': '86485ea1-8c89-4f24-a0a4-2f63bc011091',
            'id': self.eval_id,
            'name': 'test evaluation',
            'ownerId': '1560252',
            'status': 'OPEN',
            'submissionReceiptMessage': 'Your submission has been received.!'
        }
        self.team = {
            'id': 5,
            'name': 'Team Blue'
        }
        self.submission = {
            'id': 123,
            'evaluationId': self.eval_id,
            'name': self.entity['name'],
            'entityId': self.entity['id'],
            'versionNumber': self.entity['versionNumber'],
            'teamId': id_of(self.team['id']),
            'contributors': self.contributors,
            'submitterAlias': self.team['name']
        }
        self.eligibility_hash = 23

        self.patch_private_submit = patch.object(syn, "_submit", return_value=self.submission)
        self.patch_getEvaluation = patch.object(syn, "getEvaluation", return_value=self.eval)
        self.patch_get = patch.object(syn, "get", return_value=self.entity)
        self.patch_getTeam = patch.object(syn, "getTeam", return_value= self.team)
        self.patch_get_contributors = patch.object(syn, "_get_contributors",
                                                   return_value=(self.contributors, self.eligibility_hash))

        self.mock_private_submit = self.patch_private_submit.start()
        self.mock_getEvaluation = self.patch_getEvaluation.start()
        self.mock_get = self.patch_get.start()
        self.mock_getTeam = self.patch_getTeam.start()
        self.mock_get_contributors = self.patch_get_contributors.start()
Beispiel #34
0
    def __init__(self, properties=None, annotations=None, local_state=None, parent=None, **kwargs):

        if properties:
            if isinstance(properties, collections.Mapping):
                if 'annotations' in properties and isinstance(properties['annotations'], collections.Mapping):
                    annotations.update(properties['annotations'])
                    del properties['annotations']
                self.__dict__['properties'].update(properties)
            else:
                raise SynapseMalformedEntityError('Unknown argument type: properties is a %s' % str(type(properties)))

        if annotations:
            if isinstance(annotations, collections.Mapping):
                self.__dict__['annotations'].update(annotations)
            elif isinstance(annotations, basestring):
                self.properties['annotations'] = annotations
            else:
                raise SynapseMalformedEntityError('Unknown argument type: annotations is a %s' % str(type(annotations)))

        if local_state:
            if isinstance(local_state, collections.Mapping):
                self.local_state(local_state)
            else:
                raise SynapseMalformedEntityError('Unknown argument type: local_state is a %s' % str(type(local_state)))

        for key in self.__class__._local_keys:
            if key not in self.__dict__:
                self.__dict__[key] = None

        # Extract parentId from parent
        if 'parentId' not in kwargs:
            try:
                if parent: kwargs['parentId'] = id_of(parent)
            except Exception:
                if parent and isinstance(parent, Entity) and 'id' not in parent:
                    raise SynapseMalformedEntityError("Couldn't find 'id' of parent.  Has it been stored in Synapse?")
                else:
                    raise SynapseMalformedEntityError("Couldn't find 'id' of parent.")

        # Note: that this will work properly if derived classes declare their
        # internal state variable *before* invoking super(...).__init__(...)
        for key, value in kwargs.items():
            self.__setitem__(key, value)

        if 'concreteType' not in self:
            self['concreteType'] = self.__class__._synapse_entity_type
Beispiel #35
0
    def __init__(self, **kwargs):
        # Verify that the parameters are correct
        if not 'owner' in kwargs:
            raise ValueError('Wiki constructor must have an owner specified')

        # Initialize the file handle list to be an empty list
        if 'attachmentFileHandleIds' not in kwargs:
            kwargs['attachmentFileHandleIds'] = []
            
        # Move the 'fileHandles' into the proper (wordier) bucket
        if 'fileHandles' in kwargs:
            for handle in kwargs['fileHandles']:
                kwargs['attachmentFileHandleIds'].append(handle)
            del kwargs['fileHandles']
        
        super(Wiki, self).__init__(kwargs)
        self.ownerId = id_of(self.owner)
        del self['owner']
Beispiel #36
0
    def __init__(self, columns=None, schema=None, **kwargs):
        if not 'headers' in kwargs:
            if columns:
                kwargs.setdefault('headers', []).extend(
                    [SelectColumn.from_column(column) for column in columns])
            elif schema and isinstance(schema, Schema):
                kwargs.setdefault('headers', []).extend(
                    [SelectColumn(id=id) for id in schema["columnIds"]])
        if ('tableId' not in kwargs) and schema:
            kwargs['tableId'] = utils.id_of(schema)
        if not kwargs.get('tableId', None):
            raise ValueError(
                "Table schema ID must be defined to create a RowSet")
        if not kwargs.get('headers', None):
            raise ValueError(
                "Column headers must be defined to create a RowSet")

        super(RowSet, self).__init__(kwargs)
Beispiel #37
0
    def __init__(self, **kwargs):
        # Verify that the parameters are correct
        if not 'owner' in kwargs:
            raise ValueError('Wiki constructor must have an owner specified')

        # Initialize the file handle list to be an empty list
        if 'attachmentFileHandleIds' not in kwargs:
            kwargs['attachmentFileHandleIds'] = []

        # Move the 'fileHandles' into the proper (wordier) bucket
        if 'fileHandles' in kwargs:
            for handle in kwargs['fileHandles']:
                kwargs['attachmentFileHandleIds'].append(handle)
            del kwargs['fileHandles']

        super(Wiki, self).__init__(kwargs)
        self.ownerId = id_of(self.owner)
        del self['owner']
 def usedEntity(self, target, targetVersion=None, wasExecuted=False):
     """
     TODO_Sphinx
     
     :param target:        either a synapse entity or entity id (as a string)
     :param targetVersion: optionally specify the version of the entity
     :param wasExecuted:   boolean indicating whether the entity represents code that was executed to produce the result
     """
     
     reference = {'targetId':id_of(target)}
     if targetVersion:
         reference['targetVersionNumber'] = int(targetVersion)
     else:
         try:
             # If we have an Entity, get it's version number
             reference['targetVersionNumber'] = target['versionNumber']
         except (KeyError, TypeError):
             # Count on platform to get the current version of the entity from Synapse
             pass
     self['used'].append({'reference':reference, 'wasExecuted':wasExecuted, 'concreteType':'org.sagebionetworks.repo.model.provenance.UsedEntity'})
def validate(evaluation,
             send_messages=False,
             notifications=False,
             dry_run=False):
    """
    It may be convenient to validate submissions in one pass before scoring
    them, especially if scoring takes a long time.
    """
    print "\n\nValidating", utils.id_of(evaluation)
    print "-" * 60
    for submission, status in syn.getSubmissionBundles(evaluation, status='RECEIVED'):

        ## refetch the submission so that we get the file path
        ## to be later replaced by a "downloadFiles" flag on getSubmissionBundles
        submission = syn.getSubmission(submission)

        is_valid, validation_message = validate_submission(submission.filePath)
        print submission.id, validation_message
        if is_valid:
            status.status = "VALIDATED"
        else:
            status.status = "INVALID"

        if not dry_run:
            status = syn.store(status)

        ## send message AFTER storing status to ensure we don't get repeat messages
        if not is_valid and send_messages:
            profile = syn.getUserProfile(submission.userId)

            message = VALIDATION_TEMPLATE.format(
                username=profile.get('firstName', profile.get('userName', profile['ownerId'])),
                submission_id=submission.id,
                submission_name=submission.name,
                message=validation_message)

            response = syn.sendMessage(
                userIds=[submission.userId],
                messageSubject="Error validating Submission to "+CHALLENGE_NAME,
                messageBody=message)
            print "sent validation error message: ", unicode(response).encode('utf-8')
def archive(evaluation, destination=None, token=None, name=None, query=None):
    """
    Archive the submissions for the given evaluation queue and store them in the destination synapse folder.

    :param evaluation: a synapse evaluation queue or its ID
    :param destination: a synapse folder or its ID
    :param query: a query that will return the desired submissions. At least the ID must be returned.
                  defaults to _select * from evaluation_[EVAL_ID] where status=="SCORED"_.
    """
    challenge = {'5877348':'FusionDetection','5952651':'IsoformQuantification'}
    if not query:
        query = 'select * from evaluation_%s where status=="SCORED"' % utils.id_of(evaluation)
    path = challenge[utils.id_of(evaluation)]
    ## for each submission, download it's associated file and write a line of metadata
    results = Query(query=query)
    if 'objectId' not in results.headers:
        raise ValueError("Can't find the required field \"objectId\" in the results of the query: \"{0}\"".format(query))
    for result in results:
        #Check if the folder has already been created in synapse 
        #(This is used as a tool to check submissions that have already been cached)
        new_map = []
        mapping = syn.get("syn7348150")
        submissionId = result[results.headers.index('objectId')]
        check = syn.query('select id,name from folder where parentId == "%s" and name == "%s"' % (destination,submissionId))
        if check['totalNumberOfResults']==0:
            os.mkdir(submissionId)
            submission = syn.getSubmission(submissionId, downloadFile=False)
            if submission.entity.externalURL is None:
                submission = syn.getSubmission(submissionId, downloadLocation=submissionId)
                newFilePath = submission.filePath.replace(' ', '_')
                shutil.move(submission.filePath,newFilePath)
                #Store CWL file in bucket
                os.system('gsutil cp -R %s gs://smc-rna-eval/entries/%s' % (submissionId,path))
                with open(newFilePath,"r") as cwlfile:
                    docs = yaml.load(cwlfile)
                    merged = docs['$graph']
                    docker = []
                    for tools in merged:
                        if tools['class'] == 'CommandLineTool':
                            if tools.get('requirements',None) is not None:
                                for i in tools['requirements']:
                                    if i.get('dockerPull',None) is not None:
                                        docker.append(i['dockerPull'])
                            if tools.get('hints', None) is not None:
                                for i in tools['hints']:
                                    if i.get('dockerPull',None) is not None:
                                        docker.append(i['dockerPull']) 
                        if tools['class'] == 'Workflow':
                            hints = tools.get("hints",None)
                            if hints is not None:
                                for i in tools['hints']:
                                    if os.path.basename(i['class']) == "synData":
                                        temp = syn.get(i['entity'])
                                        #create synid and index mapping
                                        new_map.append([temp.id,"gs://smc-rna-eval/entries/%s/%s/%s" %(path,submissionId,temp.name)])
                                        #Store index files
                                        os.system('gsutil cp %s gs://smc-rna-eval/entries/%s/%s' % (temp.path,path,submissionId))
                os.system('rm -rf ~/.synapseCache/*')
            else:
                if submission.entity.externalURL.endswith("/"):
                    submission.entity.externalURL = submission.entity.externalURL[:-1]
                taskId = submission.entity.externalURL.split("/")[-1]
                test = subprocess.check_call(["python", os.path.join(os.path.dirname(__file__),"../../SMC-RNA-Eval/sbg-download.py"), "--token", token, taskId, submissionId])
                os.system('gsutil cp -R %s gs://smc-rna-eval/entries/%s' % (submissionId,path))
                #Pull down docker containers
                with open("%s/submission.cwl" % submissionId,"r") as cwlfile:
                    docs = yaml.load(cwlfile)
                    # merged = docs['steps']
                    # docker = []
                    # for tools in merged:
                    #     for hint in tools['run']['hints']:
                    #         if hint['class'] == 'DockerRequirement':
                    #             docker.append(hint['dockerPull'])
                    #     for require in tools['run']['requirements']:
                    #         if require.get('requirements') is not None:
                    #             for i in require.get('requirements'):
                    #                 if i['class'] == 'DockerRequirement':
                    #                     docker.append(i['dockerPull'])
                    docker = []
                    for tools in docs['hints']:
                        if tools['class'] == "DockerRequirement":
                            docker.append(tools['dockerPull'])
            os.system('rm -rf %s' % submissionId)
            if len(new_map) > 0:
                table = syn.store(Table(mapping, new_map))
            #Pull, save, and store docker containers
            docker = set(docker)
            for i in docker:
                fileName = os.path.basename(i).replace(":","_")
                os.system('sudo -i docker pull %s' % i)
                #os.system('sudo -i docker save %s' % i)
                os.system('sudo docker save -o %s.tar %s' %(fileName,i))
                os.system('sudo chmod a+r %s.tar' % fileName)
                os.system('gsutil cp %s.tar gs://smc-rna-eval/entries/%s/%s' % (fileName,path,submissionId))
                os.remove("%s.tar" % fileName)
            submission_parent = syn.store(Folder(submissionId,parent=destination))
def archive(evaluation, destination=None, name=None, query=None):
    """
    Archive the submissions for the given evaluation queue and store them in the destination synapse folder.

    :param evaluation: a synapse evaluation queue or its ID
    :param destination: a synapse folder or its ID
    :param query: a query that will return the desired submissions. At least the ID must be returned.
                  defaults to _select * from evaluation_[EVAL_ID] where status=="SCORED"_.
    """
    challenge = {
        '5877348': 'FusionDetection',
        '5952651': 'IsoformQuantification'
    }
    if not query:
        query = 'select * from evaluation_%s where status=="VALIDATED"' % utils.id_of(
            evaluation)
    path = challenge[utils.id_of(evaluation)]
    ## for each submission, download it's associated file and write a line of metadata
    results = Query(query=query)
    if 'objectId' not in results.headers:
        raise ValueError(
            "Can't find the required field \"objectId\" in the results of the query: \"{0}\""
            .format(query))
    for result in results:
        #Check if the folder has already been created in synapse
        #(This is used as a tool to check submissions that have already been cached)
        submissionId = result[results.headers.index('objectId')]
        check = syn.query(
            'select id,name from folder where parentId == "%s" and name == "%s"'
            % (destination, submissionId))
        if check['totalNumberOfResults'] == 0:
            os.mkdir(submissionId)
            submission_parent = syn.store(
                Folder(submissionId, parent=destination))
            submission = syn.getSubmission(submissionId,
                                           downloadLocation=submissionId)
            newFilePath = submission.filePath.replace(' ', '_')
            shutil.move(submission.filePath, newFilePath)
            #Store CWL file in bucket
            os.system('gsutil cp -R %s gs://smc-rna-cache/%s' %
                      (submissionId, path))
            with open(newFilePath, "r") as cwlfile:
                docs = yaml.load(cwlfile)
                merged = docs['$graph']
                docker = []
                for tools in merged:
                    if tools['class'] == 'CommandLineTool':
                        if tools.get('requirements', None) is not None:
                            for i in tools['requirements']:
                                if i.get('dockerPull', None) is not None:
                                    docker.append(i['dockerPull'])
                    if tools['class'] == 'Workflow':
                        hints = tools.get("hints", None)
                        if hints is not None:
                            for i in tools['hints']:
                                if os.path.basename(i['class']) == "synData":
                                    temp = syn.get(i['entity'])
                                    #Store index files
                                    os.system(
                                        'gsutil cp %s gs://smc-rna-cache/%s/%s'
                                        % (temp.path, path, submissionId))
            os.system('rm -rf ~/.synapseCache/*')
            #Pull, save, and store docker containers
            docker = set(docker)
            for i in docker:
                os.system('sudo docker pull %s' % i)
                os.system('sudo docker save %s' % i)
                os.system('sudo docker save -o %s.tar %s' %
                          (os.path.basename(i), i))
                os.system('sudo chmod a+r %s.tar' % os.path.basename(i))
                os.system('gsutil cp %s.tar gs://smc-rna-cache/%s/%s' %
                          (os.path.basename(i), path, submissionId))
                os.remove("%s.tar" % os.path.basename(i))
            os.system('rm -rf %s' % submissionId)
Beispiel #42
0
def syncFromSynapse(syn,
                    entity,
                    path=None,
                    ifcollision='overwrite.local',
                    allFiles=None,
                    followLink=False):
    """Synchronizes all the files in a folder (including subfolders) from Synapse and adds a readme manifest with file
    metadata.

    :param syn:         A synapse object as obtained with syn = synapseclient.login()

    :param entity:      A Synapse ID, a Synapse Entity object of type file, folder or project.

    :param path:        An optional path where the file hierarchy will be reproduced. If not specified the files will by
                        default be placed in the synapseCache.

    :param ifcollision: Determines how to handle file collisions. Maybe "overwrite.local", "keep.local", or "keep.both".
                        Defaults to "overwrite.local".

    :param followLink:  Determines whether the link returns the target Entity.
                        Defaults to False

    :returns: list of entities (files, tables, links)

    This function will crawl all subfolders of the project/folder specified by `entity` and download all files that have
    not already been downloaded.  If there are newer files in Synapse (or a local file has been edited outside of the
    cache) since the last download then local the file will be replaced by the new file unless "ifcollision" is changed.

    If the files are being downloaded to a specific location outside of the Synapse cache a file
    (SYNAPSE_METADATA_MANIFEST.tsv) will also be added in the path that contains the metadata (annotations, storage
    location and provenance of all downloaded files).

    See also:
    - :py:func:`synapseutils.sync.syncToSynapse`

    Example:
    Download and print the paths of all downloaded files::

        entities = syncFromSynapse(syn, "syn1234")
        for f in entities:
            print(f.path)

    """
    # initialize the result list
    if allFiles is None:
        allFiles = list()

    # perform validation check on user input
    if is_synapse_id(entity):
        entity = syn.get(entity,
                         downloadLocation=path,
                         ifcollision=ifcollision,
                         followLink=followLink)

    if isinstance(entity, File):
        allFiles.append(entity)
        return allFiles

    entity_id = id_of(entity)
    if not is_container(entity):
        raise ValueError(
            "The provided id: %s is neither a container nor a File" %
            entity_id)

    # get the immediate children as iterator
    children = syn.getChildren(entity_id)

    # process each child
    for child in children:
        if is_container(child):
            # If we are downloading outside cache create directory
            if path is not None:
                new_path = os.path.join(path, child['name'])
                try:
                    os.makedirs(new_path)
                except OSError as err:
                    if err.errno != errno.EEXIST:
                        raise
            else:
                new_path = None
            # recursively explore this container's children
            syncFromSynapse(syn,
                            child['id'],
                            new_path,
                            ifcollision,
                            allFiles,
                            followLink=followLink)
        else:
            # getting the child
            ent = syn.get(child['id'],
                          downloadLocation=path,
                          ifcollision=ifcollision,
                          followLink=followLink)
            if isinstance(ent, File):
                allFiles.append(ent)

    if path is not None:  # If path is None files are stored in cache.
        filename = os.path.join(path, MANIFEST_FILENAME)
        filename = os.path.expanduser(os.path.normcase(filename))
        generateManifest(syn, allFiles, filename)

    return allFiles
Beispiel #43
0
def syncFromSynapse(syn, entity, path=None, ifcollision='overwrite.local', allFiles = None, followLink=False):
    """Synchronizes all the files in a folder (including subfolders) from Synapse and adds a readme manifest with file metadata.

    :param syn:    A synapse object as obtained with syn = synapseclient.login()

    :param entity:  A Synapse ID, a Synapse Entity object of type folder or project.

    :param path: An optional path where the file hierarchy will be
                 reproduced.  If not specified the files will by default
                 be placed in the synapseCache.

    :param ifcollision:   Determines how to handle file collisions.
                          May be "overwrite.local", "keep.local", or "keep.both".
                          Defaults to "overwrite.local".

    :param followLink:  Determines whether the link returns the target Entity.
                        Defaults to False

    :returns: list of entities (files, tables, links)

    This function will crawl all subfolders of the project/folder
    specified by `entity` and download all files that have not already
    been downloaded.  If there are newer files in Synapse (or a local
    file has been edited outside of the cache) since the last download
    then local the file will be replaced by the new file unless
    ifcollision is changed.

    If the files are being downloaded to a specific location outside
    of the Synapse Cache We will also add a file
    (SYNAPSE_METADATA_MANIFEST.tsv) in the path that contains
    the metadata (annotations, storage location and provenance of all
    downloaded files)

    See also: 
    - :py:func:`synapseutils.sync.syncToSynapse`

    Example::
    Download and print the paths of all downloaded files::

        entities = syncFromSynapse(syn, "syn1234")
        for f in entities:
            print(f.path)

    """
    if allFiles is None: allFiles = list()
    id = id_of(entity)
    results = syn.chunkedQuery("select id, name, nodeType from entity where entity.parentId=='%s'" %id)
    for result in results:
        if is_container(result):
            if path is not None:  #If we are downloading outside cache create directory.
                new_path = os.path.join(path, result['entity.name'])
                try:
                    os.mkdir(new_path)
                except OSError as err:
                    if err.errno!=errno.EEXIST:
                        raise
                print('making dir', new_path)
            else:
                new_path = None
            syncFromSynapse(syn, result['entity.id'], new_path, ifcollision, allFiles)
        else:
            ent = syn.get(result['entity.id'], downloadLocation = path, ifcollision = ifcollision, followLink=followLink)
            allFiles.append(ent)
            
    if path is not None:  #If path is None files are stored in cache.
        filename = os.path.join(path, MANIFEST_FILENAME)
        filename = os.path.expanduser(os.path.normcase(filename))
        generateManifest(syn, allFiles, filename)
    return allFiles
def create_supertable_leaderboard(evaluation):
    """
    Create the leaderboard using a supertable, a markdown extension that dynamically
    builds a table by querying submissions. Because the supertable re-queries whenever
    the page is rendered, this step only has to be done once.
    """
    uri_base = urllib.quote_plus("/evaluation/submission/query")
    # it's incredibly picky that the equals sign here has to be urlencoded, but
    # the later equals signs CAN'T be urlencoded.
    query = urllib.quote_plus('query=select * from evaluation_%s where status=="SCORED"' % utils.id_of(evaluation))
    params = [  ('paging', 'true'),
                ('queryTableResults', 'true'),
                ('showIfLoggedInOnly', 'false'),
                ('pageSize', '25'),
                ('showRowNumber', 'false'),
                ('jsonResultsKeyName', 'rows')]

    # Columns specifications have 4 fields: renderer, display name, column name, sort.
    # Renderer and sort are usually 'none' and 'NONE'.
    for i, column in enumerate(LEADERBOARD_COLUMNS):
        fields = dict(renderer='none', sort='NONE')
        fields.update(column)
        params.append(('columnConfig%s' % i, "{renderer},{display_name},{column_name};,{sort}".format(**fields)))

    return "${supertable?path=" + uri_base + "%3F" + query + "&" + "&".join([key+"="+urllib.quote_plus(value) for key,value in params]) + "}"
def score(evaluation,
          send_messages=False,
          notifications=False,
          dry_run=False):

    sys.stdout.write('\n\nScoring ' + utils.id_of(evaluation))
    sys.stdout.flush()

    ## collect statuses here for batch update
    statuses = []

    for submission, status in syn.getSubmissionBundles(evaluation, status='VALIDATED'):

        ## refetch the submission so that we get the file path
        ## to be later replaced by a "downloadFiles" flag on getSubmissionBundles
        submission = syn.getSubmission(submission)

        try:
            score, message = score_submission(submission, submission.filePath)

            status.status = "SCORED"
            status.score = math.fsum(v for k,v in score.iteritems()) / len(score)
            status.annotations = synapseclient.annotations.to_submission_status_annotations(score)

        except Exception as ex1:
            sys.stderr.write('\n\nError scoring submission %s %s:\n' % (submission.name, submission.id))
            st = StringIO()
            traceback.print_exc(file=st)
            sys.stderr.write(st.getvalue())
            sys.stderr.write('\n')
            status.status = "INVALID"
            message = st.getvalue()

            if notifications and ADMIN_USER_IDS:
                submission_info = "submission id: %s\nsubmission name: %s\nsubmitted by user id: %s\n\n" % (submission.id, submission.name, submission.userId)
                response = syn.sendMessage(
                    userIds=ADMIN_USER_IDS,
                    messageSubject=CHALLENGE_NAME+": exception during scoring",
                    messageBody=error_notification_template.format(message=submission_info+st.getvalue()))
                print "sent notification: ", unicode(response).encode('utf-8')

        if not dry_run:
            status = syn.store(status)

        ## send message AFTER storing status to ensure we don't get repeat messages
        if send_messages:
            profile = syn.getUserProfile(submission.userId)

            if status.status == 'SCORED':
                message_body = scoring_message_template.format(
                    message=message,
                    username=profile.get('firstName', profile.get('userName', profile['ownerId'])),
                    submission_name=submission.name,
                    submission_id=submission.id)
                subject = "Submission to "+CHALLENGE_NAME
            else:
                message_body = scoring_error_message_template.format(
                    message=message,
                    username=profile.get('firstName', profile.get('userName', profile['ownerId'])),
                    submission_name=submission.name,
                    submission_id=submission.id)
                subject = "Error scoring submission to "+CHALLENGE_NAME

            response = syn.sendMessage(
                userIds=[submission.userId],
                messageSubject=subject,
                messageBody=message_body)
            print "sent message: ", unicode(response).encode('utf-8')

        sys.stdout.write('.')
        sys.stdout.flush()

    sys.stdout.write('\n')
Beispiel #46
0
def syncFromSynapse(syn, entity, path=None, ifcollision='overwrite.local', allFiles = None, followLink=False):
    """Synchronizes all the files in a folder (including subfolders) from Synapse and adds a readme manifest with file metadata.

    :param syn:    A synapse object as obtained with syn = synapseclient.login()

    :param entity:  A Synapse ID, a Synapse Entity object of type folder or project.

    :param path: An optional path where the file hierarchy will be
                 reproduced.  If not specified the files will by default
                 be placed in the synapseCache.

    :param ifcollision:   Determines how to handle file collisions.
                          May be "overwrite.local", "keep.local", or "keep.both".
                          Defaults to "overwrite.local".

    :param followLink:  Determines whether the link returns the target Entity.
                        Defaults to False

    :returns: list of entities (files, tables, links)

    This function will crawl all subfolders of the project/folder
    specified by `entity` and download all files that have not already
    been downloaded.  If there are newer files in Synapse (or a local
    file has been edited outside of the cache) since the last download
    then local the file will be replaced by the new file unless
    "ifcollision" is changed.

    If the files are being downloaded to a specific location outside
    of the Synapse cache a file
    (SYNAPSE_METADATA_MANIFEST.tsv) will also be added in the path that contains
    the metadata (annotations, storage location and provenance of all
    downloaded files).

    See also:
    - :py:func:`synapseutils.sync.syncToSynapse`

    Example:
    Download and print the paths of all downloaded files::

        entities = syncFromSynapse(syn, "syn1234")
        for f in entities:
            print(f.path)

    """
    if allFiles is None: allFiles = list()
    id = id_of(entity)
    results = syn.getChildren(id)
    zero_results = True
    for result in results:
        zero_results = False
        if is_container(result):
            if path is not None:  #If we are downloading outside cache create directory.
                new_path = os.path.join(path, result['name'])
                try:
                    os.makedirs(new_path)
                except OSError as err:
                    if err.errno!=errno.EEXIST:
                        raise
                print('making dir', new_path)
            else:
                new_path = None
            syncFromSynapse(syn, result['id'], new_path, ifcollision, allFiles)
        else:
            ent = syn.get(result['id'], downloadLocation = path, ifcollision = ifcollision, followLink=followLink)
            if isinstance(ent, File):
                allFiles.append(ent)
    if zero_results:
        #a http error would be raised if the synapse Id was not valid (404) or no permission (403) so at this point the entity should be get-able
        stderr.write("The synapse id %s is not a container (Project/Folder), attempting to get the entity anyways" % id)
        ent = syn.get(id, downloadLocation=path, ifcollision=ifcollision, followLink=followLink)
        if isinstance(ent, File):
            allFiles.append(ent)
        else:
            raise ValueError("The provided id: %s is was neither a container nor a File" % id)

    if path is not None:  #If path is None files are stored in cache.
        filename = os.path.join(path, MANIFEST_FILENAME)
        filename = os.path.expanduser(os.path.normcase(filename))
        generateManifest(syn, allFiles, filename)

    return allFiles
def create_challenge_object(project, participants_team):
    challenge_json = {'participantTeamId':utils.id_of(participants_team), 'projectId':utils.id_of(project)}
    return DictObject(**syn.restPOST("/challenge", body=json.dumps(challenge_json)))
def archive(evaluation, destination=None, token=None, name=None, query=None):
    """
    Archive the submissions for the given evaluation queue and store them in the destination synapse folder.

    :param evaluation: a synapse evaluation queue or its ID
    :param destination: a synapse folder or its ID
    :param query: a query that will return the desired submissions. At least the ID must be returned.
                  defaults to _select * from evaluation_[EVAL_ID] where status=="SCORED"_.
    """
    challenge = {'5877348':'FusionDetection','5952651':'IsoformQuantification'}
    if not query:
        query = 'select * from evaluation_%s where status=="SCORED"' % utils.id_of(evaluation)
    path = challenge[utils.id_of(evaluation)]
    ## for each submission, download it's associated file and write a line of metadata
    results = Query(query=query)
    if 'objectId' not in results.headers:
        raise ValueError("Can't find the required field \"objectId\" in the results of the query: \"{0}\"".format(query))
    for result in results:
        #Check if the folder has already been created in synapse 
        #(This is used as a tool to check submissions that have already been cached)
        new_map = []
        mapping = syn.get("syn7348150")
        submissionId = result[results.headers.index('objectId')]
        check = syn.query('select id,name from folder where parentId == "%s" and name == "%s"' % (destination,submissionId))
        if check['totalNumberOfResults']==0:
            os.mkdir(submissionId)
            submission = syn.getSubmission(submissionId, downloadLocation=submissionId)
            if submission.entity.externalURL is None:
                newFilePath = submission.filePath.replace(' ', '_')
                shutil.move(submission.filePath,newFilePath)
                #Store CWL file in bucket
                os.system('gsutil cp -R %s gs://smc-rna-cache/%s' % (submissionId,path))
                with open(newFilePath,"r") as cwlfile:
                    docs = yaml.load(cwlfile)
                    merged = docs['$graph']
                    docker = []
                    for tools in merged:
                        if tools['class'] == 'CommandLineTool':
                            if tools.get('requirements',None) is not None:
                                for i in tools['requirements']:
                                    if i.get('dockerPull',None) is not None:
                                        docker.append(i['dockerPull'])
                            if tools.get('hints', None) is not None:
                                for i in tools['hints']:
                                    if i.get('dockerPull',None) is not None:
                                        docker.append(i['dockerPull']) 
                        if tools['class'] == 'Workflow':
                            hints = tools.get("hints",None)
                            if hints is not None:
                                for i in tools['hints']:
                                    if os.path.basename(i['class']) == "synData":
                                        temp = syn.get(i['entity'])
                                        #create synid and index mapping
                                        new_map.append([temp.id,"gs://smc-rna-cache/%s/%s/%s" %(path,submissionId,temp.name)])
                                        #Store index files
                                        os.system('gsutil cp %s gs://smc-rna-cache/%s/%s' % (temp.path,path,submissionId))
                os.system('rm -rf ~/.synapseCache/*')
            else:
                os.system('rm %s' % os.path.join(submissionId, submission.name))
                test = subprocess.check_call(["python", os.path.join(os.path.dirname(__file__),"../../SMC-RNA-Eval/sbg-download.py"), "--token", token, submission.name, submissionId])
                os.system('gsutil cp -R %s gs://smc-rna-cache/%s' % (submissionId,path))
                #Pull down docker containers
                with open("%s/submission.cwl" % submissionId,"r") as cwlfile:
                    docs = yaml.load(cwlfile)
                    merged = docs['steps']
                    docker = []
                    for tools in merged:
                        for hint in tools['run']['hints']:
                            if hint['class'] == 'DockerRequirement':
                                docker.append(hint['dockerPull'])
                        for require in tools['run']['requirements']:
                            if require.get('requirements') is not None:
                                for i in require.get('requirements'):
                                    if i['class'] == 'DockerRequirement':
                                        docker.append(i['dockerPull'])
            os.system('rm -rf %s' % submissionId)
            if len(new_map) > 0:
                table = syn.store(Table(mapping, new_map))
            #Pull, save, and store docker containers
            docker = set(docker)
            for i in docker:
                fileName = os.path.basename(i).replace(":","_")
                os.system('sudo -i docker pull %s' % i)
                #os.system('sudo -i docker save %s' % i)
                os.system('sudo docker save -o %s.tar %s' %(fileName,i))
                os.system('sudo chmod a+r %s.tar' % fileName)
                os.system('gsutil cp %s.tar gs://smc-rna-cache/%s/%s' % (fileName,path,submissionId))
                os.remove("%s.tar" % fileName)
            submission_parent = syn.store(Folder(submissionId,parent=destination))