Exemple #1
0
def test_syncToSynapse():
    # Test upload of accurate manifest
    manifest = _makeManifest(header + row1 + row2 + row3)
    synapseutils.syncToSynapse(syn, manifest, sendMessages=False, retries=2)

    # syn.getChildren() used by syncFromSynapse() may intermittently have timing issues
    time.sleep(3)

    # Download using syncFromSynapse
    tmpdir = tempfile.mkdtemp()
    schedule_for_cleanup(tmpdir)
    entities = synapseutils.syncFromSynapse(syn, project, path=tmpdir)

    orig_df = pd.read_csv(manifest, sep='\t')
    orig_df.index = [os.path.basename(p) for p in orig_df.path]
    new_df = pd.read_csv(os.path.join(tmpdir,
                                      synapseutils.sync.MANIFEST_FILENAME),
                         sep='\t')
    new_df.index = [os.path.basename(p) for p in new_df.path]

    assert_equals(len(orig_df), len(new_df))
    new_df = new_df.loc[orig_df.index]

    # Validate what was uploaded is in right location
    assert_true(new_df.parent.equals(orig_df.parent),
                'Downloaded files not stored in same location')

    # Validate that annotations were set
    cols = synapseutils.sync.REQUIRED_FIELDS + synapseutils.sync.FILE_CONSTRUCTOR_FIELDS\
           + synapseutils.sync.STORE_FUNCTION_FIELDS
    orig_anots = orig_df.drop(cols, axis=1, errors='ignore')
    new_anots = new_df.drop(cols, axis=1, errors='ignore')
    assert_equals(
        orig_anots.shape[1],
        new_anots.shape[1])  # Verify that we have the same number of cols
    assert_true(new_anots.equals(orig_anots.loc[:, new_anots.columns]),
                'Annotations different')

    # Validate that provenance is correct
    for provenanceType in ['executed', 'used']:
        # Go through each row
        for orig, new in zip(orig_df[provenanceType], new_df[provenanceType]):
            if not pd.isnull(orig) and not pd.isnull(new):
                # Convert local file paths into synId.versionNumber strings
                orig_list = [
                    '%s.%s' %
                    (i.id, i.versionNumber) if isinstance(i, Entity) else i
                    for i in syn._convertProvenanceList(orig.split(';'))
                ]
                new_list = [
                    '%s.%s' %
                    (i.id, i.versionNumber) if isinstance(i, Entity) else i
                    for i in syn._convertProvenanceList(new.split(';'))
                ]
                assert_equals(set(orig_list), set(new_list))
Exemple #2
0
    def checkin(self, checkout_path=os.getcwd(), sync=False, force=False):
        """
        Checks in an Entity.
        """
        print('Checking in...')
        if (not self._load()):
            return

        if force and not self._is_admin_on_project():
            print('Must have administrator privileges to force check-in.')
            return

        if (not self._is_checked_out(self._entity)):
            if (force):
                print('WARNING: Entity not checked out.')
            else:
                print('Entity not checked out. Aborting.')
                return

        checked_out_by_id = self._entity[self.ANNO_CHECKED_OUT_BY_ID][0]

        if (not force and checked_out_by_id != self._user.ownerId):
            if (force):
                print(
                    'WARNING: Entity is currently checked out by {0}.'.format(
                        self._entity[self.ANNO_CHECKED_OUT_BY_NAME][0]))
            else:
                print('Entity can only be checked in by {0}. Aborting.'.format(
                    self._entity[self.ANNO_CHECKED_OUT_BY_NAME][0]))
                return

        # Upload the files
        if (sync):
            print('Syncing Folders and Files...')
            manifest_filename = os.path.join(checkout_path,
                                             'SYNAPSE_METADATA_MANIFEST.tsv')
            if (os.path.exists(manifest_filename)):
                synapseutils.syncToSynapse(self._synapse_client,
                                           manifest_filename,
                                           sendMessages=False)
            else:
                print(
                    'Manifest file not found in: "{0}". Folder/Files will not be uploaded to Synapse.'
                    .format(checkout_path))

        for key in self.ALL_ANNO:
            if key in self._entity.annotations:
                del self._entity.annotations[key]

        self._synapse_client.store(self._entity)

        print('Check-in was successful')
def test_syncToSynapse():
    # Test upload of accurate manifest
    manifest = _makeManifest(header+row1+row2+row3)
    synapseutils.syncToSynapse(syn, manifest, sendMessages=False, retries=2)

    # syn.getChildren() used by syncFromSynapse() may intermittently have timing issues
    time.sleep(3)

    # Download using syncFromSynapse
    tmpdir = tempfile.mkdtemp()
    schedule_for_cleanup(tmpdir)
    entities = synapseutils.syncFromSynapse(syn, project, path=tmpdir)
    
    orig_df = pd.read_csv(manifest, sep='\t')
    orig_df.index = [os.path.basename(p) for p in orig_df.path]
    new_df = pd.read_csv(os.path.join(tmpdir, synapseutils.sync.MANIFEST_FILENAME), sep='\t')
    new_df.index = [os.path.basename(p) for p in new_df.path]

    assert_equals(len(orig_df), len(new_df))
    new_df = new_df.loc[orig_df.index]

    # Validate what was uploaded is in right location
    assert_true(new_df.parent.equals(orig_df.parent), 'Downloaded files not stored in same location')

    # Validate that annotations were set
    cols = synapseutils.sync.REQUIRED_FIELDS + synapseutils.sync.FILE_CONSTRUCTOR_FIELDS\
           + synapseutils.sync.STORE_FUNCTION_FIELDS
    orig_anots = orig_df.drop(cols, axis=1, errors='ignore')
    new_anots = new_df.drop(cols, axis=1, errors='ignore')
    assert_equals(orig_anots.shape[1], new_anots.shape[1])  # Verify that we have the same number of cols
    assert_true(new_anots.equals(orig_anots.loc[:, new_anots.columns]), 'Annotations different')
    
    # Validate that provenance is correct
    for provenanceType in ['executed', 'used']:
        # Go through each row
        for orig, new in zip(orig_df[provenanceType], new_df[provenanceType]):
            if not pd.isnull(orig) and not pd.isnull(new):
                # Convert local file paths into synId.versionNumber strings
                orig_list = ['%s.%s' % (i.id, i.versionNumber) if isinstance(i, Entity) else i
                             for i in syn._convertProvenanceList(orig.split(';'))]
                new_list = ['%s.%s' % (i.id, i.versionNumber) if isinstance(i, Entity) else i
                             for i in syn._convertProvenanceList(new.split(';'))]
                assert_equals(set(orig_list), set(new_list))
Exemple #4
0
newTsv['dataSubtype'] = "raw"
newTsv['dataType'] = "geneExpression"
newTsv['fileFormat'] = "raw"
newTsv['resourceType'] = "experimentalData"

newTsv['cellType'] = "motor neuron"
newTsv['diagnosis'] = "Not Applicable"

newTsv['isCellLine'] = False
newTsv['isPrimaryCell'] = True  #Coming from human / animal

newTsv['tissue'] = "Not Applicable"
newTsv['organ'] = "brain"

newTsv['species'] = "Mouse"
newTsv['sex'] = float('nan')
#Some specimens could come from one individual
newTsv['specimenID'] = tsv['title']
newTsv['individualID'] = [i.split("_")[0] for i in tsv['title']]

newTsv['platform'] = "HiSeq2500"  #Create new GAII

newTsv['consortium'] = "PSON"
newTsv['fundingAgency'] = "NIH-NCI"

## Output the re-annotated data
newTsv.to_csv("GSE89107-manifest.tsv", sep="\t", index=False)

synapseutils.syncToSynapse(syn, "GSE89107-manifest.tsv")
Exemple #5
0
def sync(args, syn):
    synapseutils.syncToSynapse(syn,
                               manifestFile=args.manifestFile,
                               dryRun=args.dryRun,
                               sendMessages=args.sendMessages,
                               retries=args.retries)
def sync(args, syn):
    synapseutils.syncToSynapse(syn, manifestFile=args.manifestFile,
                               dryRun=args.dryRun, sendMessages=args.sendMessages,
                               retries=args.retries)
Exemple #7
0
newTsv['fileFormat'] = 
newTsv['resourceType'] = "experimentalData"

newTsv['cellType'] = "glial"
newTsv['diagnosis'] = "Brain Cancer"

newTsv['isCellLine'] = False
newTsv['isPrimaryCell'] = True #Coming from human / animal

newTsv['tissue'] =  "nerve tissue"
newTsv['organ'] = "brain"


newTsv['species'] = "Mouse"
newTsv['sex'] = float('nan')
#Some specimens could come from one individual
newTsv['specimenID'] = tsv.title
newTsv['individualID'] = [spec.split(" ")[0] for spec in tsv.title]

newTsv['platform'] = "IlluminaMouseWG-6v2.0"

newTsv['consortium'] = "PSON"
newTsv['fundingAgency'] = "NIH-NCI"


## Output the re-annotated data
newTsv.to_csv("%s-manifest.tsv" % GSE, sep="\t", index=False)


synapseutils.syncToSynapse(syn, "%s-manifest.tsv" % GSE)
 def syncToSynapse(self, manifestFile, dryRun = False, sendMessages = True, retries = MAX_RETRIES):
   return synapseutils.syncToSynapse(self.syn, manifestFile, dryRun, sendMessages, retries = MAX_RETRIES)