def test_syncToSynapse(): # Test upload of accurate manifest manifest = _makeManifest(header + row1 + row2 + row3) synapseutils.syncToSynapse(syn, manifest, sendMessages=False, retries=2) # syn.getChildren() used by syncFromSynapse() may intermittently have timing issues time.sleep(3) # Download using syncFromSynapse tmpdir = tempfile.mkdtemp() schedule_for_cleanup(tmpdir) entities = synapseutils.syncFromSynapse(syn, project, path=tmpdir) orig_df = pd.read_csv(manifest, sep='\t') orig_df.index = [os.path.basename(p) for p in orig_df.path] new_df = pd.read_csv(os.path.join(tmpdir, synapseutils.sync.MANIFEST_FILENAME), sep='\t') new_df.index = [os.path.basename(p) for p in new_df.path] assert_equals(len(orig_df), len(new_df)) new_df = new_df.loc[orig_df.index] # Validate what was uploaded is in right location assert_true(new_df.parent.equals(orig_df.parent), 'Downloaded files not stored in same location') # Validate that annotations were set cols = synapseutils.sync.REQUIRED_FIELDS + synapseutils.sync.FILE_CONSTRUCTOR_FIELDS\ + synapseutils.sync.STORE_FUNCTION_FIELDS orig_anots = orig_df.drop(cols, axis=1, errors='ignore') new_anots = new_df.drop(cols, axis=1, errors='ignore') assert_equals( orig_anots.shape[1], new_anots.shape[1]) # Verify that we have the same number of cols assert_true(new_anots.equals(orig_anots.loc[:, new_anots.columns]), 'Annotations different') # Validate that provenance is correct for provenanceType in ['executed', 'used']: # Go through each row for orig, new in zip(orig_df[provenanceType], new_df[provenanceType]): if not pd.isnull(orig) and not pd.isnull(new): # Convert local file paths into synId.versionNumber strings orig_list = [ '%s.%s' % (i.id, i.versionNumber) if isinstance(i, Entity) else i for i in syn._convertProvenanceList(orig.split(';')) ] new_list = [ '%s.%s' % (i.id, i.versionNumber) if isinstance(i, Entity) else i for i in syn._convertProvenanceList(new.split(';')) ] assert_equals(set(orig_list), set(new_list))
def checkin(self, checkout_path=os.getcwd(), sync=False, force=False): """ Checks in an Entity. """ print('Checking in...') if (not self._load()): return if force and not self._is_admin_on_project(): print('Must have administrator privileges to force check-in.') return if (not self._is_checked_out(self._entity)): if (force): print('WARNING: Entity not checked out.') else: print('Entity not checked out. Aborting.') return checked_out_by_id = self._entity[self.ANNO_CHECKED_OUT_BY_ID][0] if (not force and checked_out_by_id != self._user.ownerId): if (force): print( 'WARNING: Entity is currently checked out by {0}.'.format( self._entity[self.ANNO_CHECKED_OUT_BY_NAME][0])) else: print('Entity can only be checked in by {0}. Aborting.'.format( self._entity[self.ANNO_CHECKED_OUT_BY_NAME][0])) return # Upload the files if (sync): print('Syncing Folders and Files...') manifest_filename = os.path.join(checkout_path, 'SYNAPSE_METADATA_MANIFEST.tsv') if (os.path.exists(manifest_filename)): synapseutils.syncToSynapse(self._synapse_client, manifest_filename, sendMessages=False) else: print( 'Manifest file not found in: "{0}". Folder/Files will not be uploaded to Synapse.' .format(checkout_path)) for key in self.ALL_ANNO: if key in self._entity.annotations: del self._entity.annotations[key] self._synapse_client.store(self._entity) print('Check-in was successful')
def test_syncToSynapse(): # Test upload of accurate manifest manifest = _makeManifest(header+row1+row2+row3) synapseutils.syncToSynapse(syn, manifest, sendMessages=False, retries=2) # syn.getChildren() used by syncFromSynapse() may intermittently have timing issues time.sleep(3) # Download using syncFromSynapse tmpdir = tempfile.mkdtemp() schedule_for_cleanup(tmpdir) entities = synapseutils.syncFromSynapse(syn, project, path=tmpdir) orig_df = pd.read_csv(manifest, sep='\t') orig_df.index = [os.path.basename(p) for p in orig_df.path] new_df = pd.read_csv(os.path.join(tmpdir, synapseutils.sync.MANIFEST_FILENAME), sep='\t') new_df.index = [os.path.basename(p) for p in new_df.path] assert_equals(len(orig_df), len(new_df)) new_df = new_df.loc[orig_df.index] # Validate what was uploaded is in right location assert_true(new_df.parent.equals(orig_df.parent), 'Downloaded files not stored in same location') # Validate that annotations were set cols = synapseutils.sync.REQUIRED_FIELDS + synapseutils.sync.FILE_CONSTRUCTOR_FIELDS\ + synapseutils.sync.STORE_FUNCTION_FIELDS orig_anots = orig_df.drop(cols, axis=1, errors='ignore') new_anots = new_df.drop(cols, axis=1, errors='ignore') assert_equals(orig_anots.shape[1], new_anots.shape[1]) # Verify that we have the same number of cols assert_true(new_anots.equals(orig_anots.loc[:, new_anots.columns]), 'Annotations different') # Validate that provenance is correct for provenanceType in ['executed', 'used']: # Go through each row for orig, new in zip(orig_df[provenanceType], new_df[provenanceType]): if not pd.isnull(orig) and not pd.isnull(new): # Convert local file paths into synId.versionNumber strings orig_list = ['%s.%s' % (i.id, i.versionNumber) if isinstance(i, Entity) else i for i in syn._convertProvenanceList(orig.split(';'))] new_list = ['%s.%s' % (i.id, i.versionNumber) if isinstance(i, Entity) else i for i in syn._convertProvenanceList(new.split(';'))] assert_equals(set(orig_list), set(new_list))
newTsv['dataSubtype'] = "raw" newTsv['dataType'] = "geneExpression" newTsv['fileFormat'] = "raw" newTsv['resourceType'] = "experimentalData" newTsv['cellType'] = "motor neuron" newTsv['diagnosis'] = "Not Applicable" newTsv['isCellLine'] = False newTsv['isPrimaryCell'] = True #Coming from human / animal newTsv['tissue'] = "Not Applicable" newTsv['organ'] = "brain" newTsv['species'] = "Mouse" newTsv['sex'] = float('nan') #Some specimens could come from one individual newTsv['specimenID'] = tsv['title'] newTsv['individualID'] = [i.split("_")[0] for i in tsv['title']] newTsv['platform'] = "HiSeq2500" #Create new GAII newTsv['consortium'] = "PSON" newTsv['fundingAgency'] = "NIH-NCI" ## Output the re-annotated data newTsv.to_csv("GSE89107-manifest.tsv", sep="\t", index=False) synapseutils.syncToSynapse(syn, "GSE89107-manifest.tsv")
def sync(args, syn): synapseutils.syncToSynapse(syn, manifestFile=args.manifestFile, dryRun=args.dryRun, sendMessages=args.sendMessages, retries=args.retries)
def sync(args, syn): synapseutils.syncToSynapse(syn, manifestFile=args.manifestFile, dryRun=args.dryRun, sendMessages=args.sendMessages, retries=args.retries)
newTsv['fileFormat'] = newTsv['resourceType'] = "experimentalData" newTsv['cellType'] = "glial" newTsv['diagnosis'] = "Brain Cancer" newTsv['isCellLine'] = False newTsv['isPrimaryCell'] = True #Coming from human / animal newTsv['tissue'] = "nerve tissue" newTsv['organ'] = "brain" newTsv['species'] = "Mouse" newTsv['sex'] = float('nan') #Some specimens could come from one individual newTsv['specimenID'] = tsv.title newTsv['individualID'] = [spec.split(" ")[0] for spec in tsv.title] newTsv['platform'] = "IlluminaMouseWG-6v2.0" newTsv['consortium'] = "PSON" newTsv['fundingAgency'] = "NIH-NCI" ## Output the re-annotated data newTsv.to_csv("%s-manifest.tsv" % GSE, sep="\t", index=False) synapseutils.syncToSynapse(syn, "%s-manifest.tsv" % GSE)
def syncToSynapse(self, manifestFile, dryRun = False, sendMessages = True, retries = MAX_RETRIES): return synapseutils.syncToSynapse(self.syn, manifestFile, dryRun, sendMessages, retries = MAX_RETRIES)