def interaction_func(self, submission, admin):
        """Archives Project Submissions

        Args:
            submission: Submission object
            admin: Specify Synapse userid/team for archive to be
                   shared with
        Returns:
            archive status dict
        """

        project_entity = Project('Archived {} {} {} {}'.format(
            submission.name.replace("&", "+").replace("'", ""),
            int(round(time.time() * 1000)),
            submission.id,
            submission.entityId))
        new_project_entity = self.syn.store(project_entity)
        permissions.set_entity_permissions(self.syn, new_project_entity,
                                           admin, "admin")

        synapseutils.copy(self.syn, submission.entityId,
                          new_project_entity.id)
        archived = {"archived": new_project_entity.id}

        archive_status = {'valid': True,
                          'annotations': archived,
                          'message': "Archived!"}
        return archive_status
Example #2
0
def archive_writeup(syn, evaluation, stat="VALIDATED", reArchive=False):
    """
    Archive the submissions for the given evaluation queue and
    store them in the destination synapse folder.

    :param evaluation: a synapse evaluation queue or its ID
    :param query: a query that will return the desired submissions.
                  At least the ID must be returned. Defaults to:
                  'select * from evaluation_[EVAL_ID] where status=="SCORED"'
    """
    if type(evaluation) != synapseclient.Evaluation:
        evaluation = syn.getEvaluation(evaluation)

    print("\n\nArchiving", evaluation.id, evaluation.name)
    print("-" * 60)

    for sub, status in syn.getSubmissionBundles(evaluation, status=stat):
        # retrieve file into cache and copy it to destination
        checkIfArchived = filter(lambda x: x.get("key") == "archived",
                                 status.annotations['stringAnnos'])
        if len(list(checkIfArchived)) == 0 or reArchive:
            projectEntity = synapseclient.Project(
                'Archived {} {} {} {}'.format(
                    sub.name.replace("&", "+").replace("'", ""),
                    int(round(time.time() * 1000)), sub.id, sub.entityId))
            entity = syn.store(projectEntity)
            adminPriv = [
                'DELETE', 'DOWNLOAD', 'CREATE', 'READ', 'CHANGE_PERMISSIONS',
                'UPDATE', 'MODERATE', 'CHANGE_SETTINGS'
            ]
            syn.setPermissions(entity, "3324230", adminPriv)
            synapseutils.copy(syn, sub.entityId, entity.id)
            archived = {"archived": entity.id}
            status = utils.update_single_submission_status(status, archived)
            syn.store(status)
Example #3
0
def copy(args,syn):
    mappings = synapseutils.copy(syn, args.id, args.destinationId, 
                         copyWikiPage=args.skipCopyWiki, 
                         excludeTypes=args.excludeTypes, 
                         version=args.version, updateExisting=args.updateExisting,
                         setProvenance=args.setProvenance)
    print(mappings)
 def test_no_copy_types(self):
     """Docker repositories and EntityViews aren't copied"""
     access_requirements = {'results': []}
     permissions = ["DOWNLOAD"]
     with patch.object(syn, "get",
                       return_value=self.project_entity) as patch_syn_get,\
          patch.object(syn, "getPermissions",
                       return_value=permissions) as patch_syn_permissions,\
          patch.object(syn, "restGET",
                       return_value=access_requirements) as patch_restget,\
          patch.object(syn, "getChildren") as patch_get_children:
         copied_file = synapseutils.copy(
             syn,
             self.project_entity,
             destinationId=self.second_project.id,
             skipCopyWikiPage=True)
         assert_equals(copied_file,
                       {self.project_entity.id: self.second_project.id})
         calls = [
             call(self.project_entity, downloadFile=False),
             call(self.second_project.id)
         ]
         patch_syn_get.assert_has_calls(calls)
         patch_restget.assert_called_once_with(
             '/entity/{}/accessRequirement'.format(self.project_entity.id))
         patch_get_children.assert_called_once_with(
             self.project_entity,
             includeTypes=['folder', 'file', 'table', 'link'])
def archive(evaluation, stat="VALIDATED", reArchive=False):
    """
    Archive the submissions for the given evaluation queue and store them in the destination synapse folder.

    :param evaluation: a synapse evaluation queue or its ID
    :param query: a query that will return the desired submissions. At least the ID must be returned.
                  defaults to _select * from evaluation_[EVAL_ID] where status=="SCORED"_.
    """
    if type(evaluation) != Evaluation:
        evaluation = syn.getEvaluation(evaluation)

    print "\n\nArchiving", evaluation.id, evaluation.name
    print "-" * 60
    sys.stdout.flush()

    for submission, status in syn.getSubmissionBundles(evaluation, status=stat):
        ## retrieve file into cache and copy it to destination
        checkIfArchived = filter(lambda x: x.get("key") == "archived", status.annotations['stringAnnos'])
        if len(checkIfArchived)==0 or reArchive:
            projectEntity = Project('Archived %s %d %s %s' % (submission.name,int(round(time.time() * 1000)),submission.id,submission.entityId))
            entity = syn.store(projectEntity)
            adminPriv = ['DELETE','DOWNLOAD','CREATE','READ','CHANGE_PERMISSIONS','UPDATE','MODERATE','CHANGE_SETTINGS']
            syn.setPermissions(entity,"3324230",adminPriv)
            syn.setPermissions(entity,"3329874",adminPriv)
            syn.setPermissions(entity,"3356007",["READ","DOWNLOAD"])
            copied = synu.copy(syn, submission.entityId, entity.id)
            archived = {"archived":entity.id}
            status = update_single_submission_status(status, archived)
            syn.store(status)
Example #6
0
def buildProject(syn, projectName, teamId, adminId, templateId, projectView):
    """
    Copies a synapse project template and adds it to the csbc consortium project view

    :param syn:
    :param projectName:
    :param teamId:
    :param adminId:
    :param templateId:
    :param projectView:
    :return:
    """

    pc = createProject(syn,
                       project_name=projectName,
                       teamId=teamId,
                       adminId=adminId)
    print("project %s location on synapse is %s" % (projectName, pc.id))

    copied_syn_dict = synapseutils.copy(syn,
                                        entity=templateId,
                                        destinationId=pc.id)

    pv = updateProjectViewScope(syn, projectView, pc.id)
    print("Updated csbc project view scope - needs updated annotations\n")
Example #7
0
def copy(args,syn):
    mappings = synapseutils.copy(syn, args.id, args.destinationId,
                         skipCopyWikiPage=args.skipCopyWiki,
                         skipCopyAnnotations=args.skipCopyAnnotations,
                         excludeTypes=args.excludeTypes,
                         version=args.version, updateExisting=args.updateExisting,
                         setProvenance=args.setProvenance)
    print(mappings)
def archive(evaluation, archiveType, destination=None, name=None, query=None):
    """
    Archive the submissions for the given evaluation queue and store them in the destination synapse folder.

    :param evaluation: a synapse evaluation queue or its ID
    :param destination: a synapse folder or its ID
    :param query: a query that will return the desired submissions. At least the ID must be returned.
                  defaults to _select * from evaluation_[EVAL_ID] where status=="SCORED"_.
    """
    tempdir = tempfile.mkdtemp()
    archive_dirname = 'submissions_%s' % utils.id_of(evaluation)

    if not query:
        query = 'select * from evaluation_%s where status=="SCORED"' % utils.id_of(evaluation)

    ## for each submission, download it's associated file and write a line of metadata
    results = Query(query=query)
    if 'objectId' not in results.headers:
        raise ValueError("Can't find the required field \"objectId\" in the results of the query: \"{0}\"".format(query))
    if archiveType == "submission":
        if not name:
            name = 'submissions_%s.tgz' % utils.id_of(evaluation)
        tar_path = os.path.join(tempdir, name)
        print "creating tar at:", tar_path
        print results.headers
        with tarfile.open(tar_path, mode='w:gz') as archive:
            with open(os.path.join(tempdir, 'submission_metadata.csv'), 'w') as f:
                f.write( (','.join(hdr for hdr in (results.headers + ['filename'])) + '\n').encode('utf-8') )
                for result in results:
                    ## retrieve file into cache and copy it to destination
                    submission = syn.getSubmission(result[results.headers.index('objectId')])
                    prefixed_filename = submission.id + "_" + os.path.basename(submission.filePath)
                    archive.add(submission.filePath, arcname=os.path.join(archive_dirname, prefixed_filename))
                    line = (','.join(unicode(item) for item in (result+[prefixed_filename]))).encode('utf-8')
                    print line
                    f.write(line + '\n')
            archive.add(
                name=os.path.join(tempdir, 'submission_metadata.csv'),
                arcname=os.path.join(archive_dirname, 'submission_metadata.csv'))

        entity = syn.store(File(tar_path, parent=destination), evaluation_id=utils.id_of(evaluation))
        print("created:", entity.id, entity.name)
        toReturn = entity.id
    else:
        toReturn = {}
        for result in results:
            ## retrieve file into cache and copy it to destination
            submission = syn.getSubmission(result[results.headers.index('objectId')])
            projectEntity = Project('Archived %s %s %s %s' % (time.strftime("%Y%m%d"),submission.id,submission.entity.id,submission.entity.name))
            entity = syn.store(projectEntity)
            copied = synu.copy(syn, submission.entity.id, entity.id)
            toReturn.update(copied)
    return toReturn
Example #9
0
 def test_dont_copy_read_permissions(self):
     """Entities with READ permissions not copied"""
     permissions = {'canDownload': False}
     with patch.object(syn, "get",
                       return_value=self.file_ent) as patch_syn_get,\
          patch.object(syn, "restGET",
                       return_value=permissions) as patch_rest_get:
         copied_file = synapseutils.copy(
             syn,
             self.file_ent,
             destinationId=self.second_project.id,
             skipCopyWikiPage=True)
         assert_equal(copied_file, dict())
         patch_syn_get.assert_called_once_with(self.file_ent,
                                               downloadFile=False)
         rest_call = "/entity/{}/permissions".format(self.file_ent.id)
         patch_rest_get.assert_called_once_with(rest_call)
 def test_dont_copy_read_permissions(self):
     """Entities with READ permissions not copied"""
     permissions = ["READ"]
     with patch.object(syn, "get",
                      return_value=self.file_ent) as patch_syn_get,\
          patch.object(syn, "getPermissions",
                       return_value=permissions) as patch_syn_permissions:
         copied_file = synapseutils.copy(
             syn,
             self.file_ent,
             destinationId=self.second_project.id,
             skipCopyWikiPage=True)
         assert_equals(copied_file, dict())
         patch_syn_get.assert_called_once_with(self.file_ent,
                                               downloadFile=False)
         patch_syn_permissions.assert_called_once_with(
             self.file_ent, syn.username)
Example #11
0
def archive_project(syn, submission, admin):
    """
    Make a copy (archive) of the Project submission.

    Args:
        submission - submission ID
        admin - user who will own the archived project
    """
    writeup = syn.getSubmission(submission)
    name = writeup.entity.name.replace("&", "+").replace("'", "")
    curr_time = int(round(time.time() * 1000))
    new_project = Project(f"Archived {name} {curr_time} {writeup.id} " +
                          f"{writeup.entityId}")
    archive = syn.store(new_project)
    permissions.set_entity_permissions(syn, archive, admin, "admin")
    archived = synapseutils.copy(syn, writeup.entityId, archive.id)
    return {"archived": archived.get(writeup.entityId)}
 def test_copy_entity_access_requirements(self):
     # TEST: Entity with access requirement not copied
     access_requirements = {'results': ["fee", "fi"]}
     permissions = ["DOWNLOAD"]
     with patch.object(syn, "get",
                       return_value=self.file_ent) as patch_syn_get,\
          patch.object(syn, "getPermissions",
                       return_value=permissions) as patch_syn_permissions,\
          patch.object(syn, "restGET",
                       return_value=access_requirements) as patch_restget:
         copied_file = synapseutils.copy(
             syn,
             self.file_ent,
             destinationId=self.second_project.id,
             skipCopyWikiPage=True)
         assert_equals(copied_file, dict())
         patch_syn_get.assert_called_once_with(self.file_ent,
                                               downloadFile=False)
         patch_restget.assert_called_once_with(
             '/entity/{}/accessRequirement'.format(self.file_ent.id))
Example #13
0
def create_team_wikis(syn, synid, templateid, tracker_table_synid):
    """
    Function that creates wiki pages from a template by looking at teams that
    are registered for a challenge.  The teams that have a wiki made for them
    Are stored into a trackerTable that has columns wikiSynId, and teamId

    Args:
        synId: Synapse id of challenge project
        templateId:  Synapse id of the template
        trackerTableSynId: Synapse id of Table that tracks if wiki pages
                           have been made per team
    """

    challenge_ent = syn.get(synid)
    challenge_obj = utils.get_challenge(challenge_ent)
    registered_teams = syn._GET_paginated("/challenge/{}/challengeTeam".format(
        challenge_obj['id']))
    for i in registered_teams:
        submitted_teams = syn.tableQuery(
            "SELECT * FROM {} where teamId = '{}'".format(
                tracker_table_synid, i['teamId']))
        if len(submitted_teams.asDataFrame()) == 0:
            team = syn.getTeam(i['teamId'])
            # The project name is the challenge project name and team name
            project = syn.store(
                synapseclient.Project("{} {}".format(challenge_ent.name,
                                                     team.name)))
            # Give admin access to the team
            syn.setPermissions(project,
                               i['teamId'],
                               accessType=[
                                   'DELETE', 'CHANGE_SETTINGS', 'MODERATE',
                                   'CREATE', 'READ', 'DOWNLOAD', 'UPDATE',
                                   'CHANGE_PERMISSIONS'
                               ])
            wiki_copy = synapseutils.copy(syn, templateid, project.id)
            # syn.sendMessage(i[])
            # Store copied synId to tracking table
            tracking_table = synapseclient.Table(
                tracker_table_synid, [[wiki_copy[templateid], i['teamId']]])
            syn.store(tracking_table)
Example #14
0
 def test_copy_entity_access_requirements(self):
     # TEST: Entity with access requirement not copied
     access_requirements = {'results': ["fee", "fi"]}
     permissions = {'canDownload': True}
     with patch.object(syn, "get",
                       return_value=self.file_ent) as patch_syn_get,\
          patch.object(syn, "restGET",
                       side_effects=[permissions,
                                     access_requirements]) as patch_rest_get:
         copied_file = synapseutils.copy(
             syn,
             self.file_ent,
             destinationId=self.second_project.id,
             skipCopyWikiPage=True)
         assert_equal(copied_file, dict())
         patch_syn_get.assert_called_once_with(self.file_ent,
                                               downloadFile=False)
         calls = [
             call('/entity/{}/accessRequirement'.format(self.file_ent.id)),
             call("/entity/{}/permissions".format(self.file_ent.id))
         ]
         patch_rest_get.has_calls(calls)
Example #15
0
def consortiumToPublic(syn, processingDate, genie_version, releaseId, databaseSynIdMappingDf, publicReleaseCutOff=365, staging=False):

    ANONYMIZE_CENTER = syn.tableQuery('SELECT * FROM syn10170510')
    ANONYMIZE_CENTER_DF = ANONYMIZE_CENTER.asDataFrame()
    CNA_PATH = os.path.join(db_to_staging.GENIE_RELEASE_DIR,"data_CNA_%s.txt" % genie_version)
    CLINICAL_PATH = os.path.join(db_to_staging.GENIE_RELEASE_DIR,'data_clinical_%s.txt' % genie_version)
    CLINICAL_SAMPLE_PATH = os.path.join(db_to_staging.GENIE_RELEASE_DIR,'data_clinical_sample_%s.txt' % genie_version)
    CLINICAL_PATIENT_PATH = os.path.join(db_to_staging.GENIE_RELEASE_DIR,'data_clinical_patient_%s.txt' % genie_version)
    DATA_GENE_PANEL_PATH = os.path.join(db_to_staging.GENIE_RELEASE_DIR,'data_gene_matrix_%s.txt' % genie_version)
    MUTATIONS_PATH = os.path.join(db_to_staging.GENIE_RELEASE_DIR,'data_mutations_extended_%s.txt' % genie_version)
    FUSIONS_PATH = os.path.join(db_to_staging.GENIE_RELEASE_DIR,'data_fusions_%s.txt' % genie_version)
    SEG_PATH = os.path.join(db_to_staging.GENIE_RELEASE_DIR,'genie_public_data_cna_hg19_%s.seg' % genie_version)
    COMBINED_BED_PATH = os.path.join(db_to_staging.GENIE_RELEASE_DIR,'genie_combined_%s.bed' % genie_version)

    if not os.path.exists(db_to_staging.GENIE_RELEASE_DIR):
        os.mkdir(db_to_staging.GENIE_RELEASE_DIR)
    if not os.path.exists(db_to_staging.CASE_LIST_PATH):
        os.mkdir(db_to_staging.CASE_LIST_PATH)

    # if staging:
    #   #public release staging
    #   PUBLIC_RELEASE_PREVIEW = "syn7871696"
    #   PUBLIC_RELEASE_PREVIEW_CASELIST = "syn9689659"
    # else:
    #public release preview
    PUBLIC_RELEASE_PREVIEW =  databaseSynIdMappingDf['Id'][databaseSynIdMappingDf['Database'] == 'public'].values[0]
    PUBLIC_RELEASE_PREVIEW_CASELIST = db_to_staging.find_caselistid(syn, PUBLIC_RELEASE_PREVIEW)

    ##############################################################################################################################
    ## Sponsored projects filter
    ##############################################################################################################################
    ## if before release date -> go into staging consortium
    ## if after date -> go into public 
    # sponsoredReleaseDate = syn.tableQuery('SELECT * FROM syn8545108')
    # sponsoredReleaseDateDf = sponsoredReleaseDate.asDataFrame()
    # sponsoredProjectSamples = syn.tableQuery('SELECT * FROM syn8545106')
    # sponsoredProjectSamplesDf = sponsoredProjectSamples.asDataFrame()
    # sponsoredProjectsDf = sponsoredProjectSamplesDf.merge(sponsoredReleaseDateDf, left_on="sponsoredProject", right_on="sponsoredProjects")
    # dates = sponsoredProjectsDf['releaseDate'].apply(lambda date: datetime.datetime.strptime(date, '%b-%Y'))
    # publicReleaseSamples = sponsoredProjectsDf['genieSampleId'][dates < processingDate]
    ##############################################################################################################################
    
    # SEQ_DATE filter
    # Jun-2015, given processing date (today) -> public release (processing date - Jun-2015 > 12 months)
    consortiumReleaseWalk = synapseutils.walk(syn, releaseId)

    consortiumRelease = next(consortiumReleaseWalk)
    clinical = [syn.get(synid, followLink=True) for filename, synid in consortiumRelease[2] if filename == "data_clinical.txt"][0]
    gene_matrix = [syn.get(synid, followLink=True) for filename, synid in consortiumRelease[2] if filename == "data_gene_matrix.txt"][0]

    clinicalDf = pd.read_csv(clinical.path, sep="\t", comment="#")
    gene_matrixdf = pd.read_csv(gene_matrix.path, sep="\t")

    removeForPublicSamples = process_functions.seqDateFilter(clinicalDf,processingDate,publicReleaseCutOff)
    #comment back in when public release filter back on
    #publicReleaseSamples = publicReleaseSamples.append(keepForPublicSamples)
    #Make sure all null oncotree codes are removed
    clinicalDf = clinicalDf[~clinicalDf['ONCOTREE_CODE'].isnull()]
    publicReleaseSamples = clinicalDf.SAMPLE_ID[~clinicalDf.SAMPLE_ID.isin(removeForPublicSamples)]

    logger.info("SEQ_DATES for public release: " + ", ".join(set(clinicalDf.SEQ_DATE[clinicalDf.SAMPLE_ID.isin(publicReleaseSamples)].astype(str))))

    #Clinical release scope filter
    #If consortium -> Don't release to public
    clinicalReleaseScope = syn.tableQuery("SELECT * FROM syn8545211 where releaseScope = 'public'")
    publicRelease = clinicalReleaseScope.asDataFrame()

    allClin = clinicalDf[clinicalDf['SAMPLE_ID'].isin(publicReleaseSamples)]
    allClin.to_csv(CLINICAL_PATH, sep="\t", index=False)

    gene_matrixdf = gene_matrixdf[gene_matrixdf['SAMPLE_ID'].isin(publicReleaseSamples)]
    gene_matrixdf.to_csv(DATA_GENE_PANEL_PATH,sep="\t",index=False)
    storeFile(syn, DATA_GENE_PANEL_PATH, PUBLIC_RELEASE_PREVIEW, ANONYMIZE_CENTER_DF, genie_version, name="data_gene_matrix.txt")
    storeFile(syn, CLINICAL_PATH, PUBLIC_RELEASE_PREVIEW, ANONYMIZE_CENTER_DF, genie_version, name="data_clinical.txt")
    
    create_case_lists.main(CLINICAL_PATH, DATA_GENE_PANEL_PATH, db_to_staging.CASE_LIST_PATH, "genie_public")

    caseListFiles = os.listdir(db_to_staging.CASE_LIST_PATH)
    caseListEntities = []
    for casePath in caseListFiles:
        casePath = os.path.join(db_to_staging.CASE_LIST_PATH, casePath)
        caseListEntities.append(storeFile(syn, casePath, PUBLIC_RELEASE_PREVIEW_CASELIST, ANONYMIZE_CENTER_DF, genie_version))

    #Grab mapping table to fill in clinical headers
    mapping_table = syn.tableQuery('SELECT * FROM syn9621600')
    mapping = mapping_table.asDataFrame()
    genePanelEntities = []
    for entName, entId in consortiumRelease[2]:
        if "data_linear" in entName or "meta_" in entName:
            continue
        elif entName == "data_clinical.txt":
            patientCols = publicRelease['fieldName'][publicRelease['level'] == "patient"].tolist()
            sampleCols = ["PATIENT_ID"]
            sampleCols.extend(publicRelease['fieldName'][publicRelease['level'] == "sample"].tolist())
            #clinicalDf is defined on line 36
            # clinicalDf['AGE_AT_SEQ_REPORT'] = [int(math.floor(int(float(i))/365.25)) if process.checkInt(i) else i for i in clinicalDf['AGE_AT_SEQ_REPORT']]
            # clinicalDf['AGE_AT_SEQ_REPORT'][clinicalDf['AGE_AT_SEQ_REPORT'] == ">32485"] = ">89"
            # clinicalDf['AGE_AT_SEQ_REPORT'][clinicalDf['AGE_AT_SEQ_REPORT'] == "<6570"] = "<18"

            clinicalDf = clinicalDf[clinicalDf['SAMPLE_ID'].isin(publicReleaseSamples)]

            #Delete columns that are private scope
            # for private in privateRelease:
            #   del clinicalDf[private]
            process_functions.addClinicalHeaders(clinicalDf, mapping, patientCols, sampleCols, CLINICAL_SAMPLE_PATH, CLINICAL_PATIENT_PATH)

            storeFile(syn, CLINICAL_SAMPLE_PATH, PUBLIC_RELEASE_PREVIEW, ANONYMIZE_CENTER_DF, genie_version, name="data_clinical_sample.txt")
            storeFile(syn, CLINICAL_PATIENT_PATH, PUBLIC_RELEASE_PREVIEW, ANONYMIZE_CENTER_DF, genie_version, name="data_clinical_patient.txt")

        elif "mutation" in entName:
            mutation = syn.get(entId, followLink=True)
            mutationDf = pd.read_csv(mutation.path, sep="\t", comment="#")
            mutationDf = commonVariantFilter(mutationDf)
            mutationDf['FILTER'] = "PASS"
            mutationDf = mutationDf[mutationDf['Tumor_Sample_Barcode'].isin(publicReleaseSamples)]
            text = process_functions.removeFloat(mutationDf)
            with open(MUTATIONS_PATH, 'w') as f:
                f.write(text)
            storeFile(syn, MUTATIONS_PATH, PUBLIC_RELEASE_PREVIEW, ANONYMIZE_CENTER_DF, genie_version, name="data_mutations_extended.txt")

        elif "fusion" in entName:
            fusion = syn.get(entId, followLink=True)
            fusionDf = pd.read_csv(fusion.path, sep="\t")
            #remove = ["Entrez_Gene_Id","Method"]
            #fusionDf = fusionDf[fusionDf.columns[~fusionDf.columns.isin(remove)]]
            fusionDf = fusionDf[fusionDf['Tumor_Sample_Barcode'].isin(publicReleaseSamples)]
            fusionDf.to_csv(FUSIONS_PATH,sep="\t",index=False)
            storeFile(syn, FUSIONS_PATH, PUBLIC_RELEASE_PREVIEW, ANONYMIZE_CENTER_DF, genie_version, name="data_fusions.txt")
        elif "CNA" in entName:
            cna = syn.get(entId, followLink=True)
            cnaDf = pd.read_csv(cna.path, sep="\t")
            cnaDf = cnaDf[cnaDf.columns[cnaDf.columns.isin(publicReleaseSamples.append(pd.Series("Hugo_Symbol")))]]
            text = process_functions.removeFloat(cnaDf)
            text = text.replace("\t\t","\tNA\t").replace("\t\t","\tNA\t").replace('\t\n',"\tNA\n")
            with open(CNA_PATH, "w") as cnaFile:
                cnaFile.write(text)
            storeFile(syn, CNA_PATH, PUBLIC_RELEASE_PREVIEW, ANONYMIZE_CENTER_DF, genie_version, name="data_CNA.txt")
        elif entName.endswith(".seg"):
            seg = syn.get(entId, followLink=True)
            segDf = pd.read_csv(seg.path, sep="\t")
            segDf = segDf[segDf['ID'].isin(publicReleaseSamples)]
            text = process_functions.removeFloat(segDf)
            with open(SEG_PATH, "w") as segFile:
                segFile.write(text)
            storeFile(syn, SEG_PATH, PUBLIC_RELEASE_PREVIEW, ANONYMIZE_CENTER_DF, genie_version, name="genie_public_data_cna_hg19.seg")
        elif entName == "data_gene_matrix.txt":
            pass
            # This file was processed above because it had to be used for generating caselists
            # panel = syn.get(entId, followLink=True)
            # panelDf = pd.read_csv(panel.path, sep="\t")
            # panelDf = panelDf[panelDf['SAMPLE_ID'].isin(publicReleaseSamples)]
            # panelDf.to_csv(DATA_GENE_PANEL_PATH,sep="\t",index=False)
            # storeFile(syn, DATA_GENE_PANEL_PATH, PUBLIC_RELEASE_PREVIEW, ANONYMIZE_CENTER_DF, genie_version, name="data_gene_matrix.txt")
        elif entName == "genie_combined.bed":
            bed = syn.get(entId, followLink=True)
            bedDf = pd.read_csv(bed.path, sep="\t")
            bedDf = bedDf[bedDf.SEQ_ASSAY_ID.isin(allClin.SEQ_ASSAY_ID)]
            bedDf.to_csv(COMBINED_BED_PATH,sep="\t",index=False)
            storeFile(syn, COMBINED_BED_PATH, PUBLIC_RELEASE_PREVIEW, ANONYMIZE_CENTER_DF, genie_version, name="genie_combined.bed")
        elif entName in ["data_clinical_sample.txt", "data_clinical_patient.txt"] or entName.endswith(".html"):
            continue
        elif entName.startswith("data_gene_panel"):
            genePanel = syn.get(entId, followLink=True)
            #Create new gene panel naming and store
            fileName = os.path.basename(genePanel.path)
            newFileList = fileName.split("_")
            newFileList[-1] = genie_version + ".txt"
            newFileName = "_".join(newFileList)
            genePanelPath = os.path.join(db_to_staging.GENIE_RELEASE_DIR, newFileName)
            shutil.copy(genePanel.path, genePanelPath)
            del newFileList[-1]
            entName = "_".join(newFileList)
            entName = entName + ".txt"
            genePanelEntities.append(storeFile(syn, genePanelPath, PUBLIC_RELEASE_PREVIEW, ANONYMIZE_CENTER_DF, genie_version, name=entName))
        else:
            ent = syn.get(entId, followLink=True, downloadFile=False)
            copiedId = synapseutils.copy(syn, ent, PUBLIC_RELEASE_PREVIEW, version=ent.versionNumber, updateExisting=True, setProvenance = None, skipCopyAnnotations=True)
            copiedEnt = syn.get(copiedId[ent.id],downloadFile=False)
            #Set version comment
            copiedEnt.versionComment=genie_version
            syn.store(copiedEnt, forceVersion=False)
    return((caseListEntities,genePanelEntities))
 def copy(self, entity, destinationId, skipCopyWikiPage = False, skipCopyAnnotations = False, **kwargs):
   return synapseutils.copy(self.syn, entity, destinationId, skipCopyWikiPage, skipCopyAnnotations, **kwargs)
Example #17
0
def test_copyWiki():
    # Create a Project
    project_entity = syn.store(Project(name=str(uuid.uuid4())))

    schedule_for_cleanup(project_entity.id)

    folder_entity = syn.store(
        Folder(name=str(uuid.uuid4()), parent=project_entity))
    schedule_for_cleanup(folder_entity.id)
    second_folder = syn.store(
        Folder(name=str(uuid.uuid4()), parent=project_entity))
    schedule_for_cleanup(second_folder.id)
    third_folder = syn.store(
        Folder(name=str(uuid.uuid4()), parent=project_entity))
    schedule_for_cleanup(third_folder.id)

    filename = utils.make_bogus_data_file()
    attachname = utils.make_bogus_data_file()

    schedule_for_cleanup(filename)
    file_entity = syn.store(File(filename, parent=folder_entity))
    nested_folder = syn.store(
        Folder(name=str(uuid.uuid4()), parent=folder_entity))
    second_file = syn.store(File(filename, parent=nested_folder))

    schedule_for_cleanup(file_entity.id)
    schedule_for_cleanup(nested_folder.id)
    schedule_for_cleanup(second_file.id)

    fileWiki = Wiki(owner=second_file, title='A Test Wiki', markdown="Test")
    fileWiki = syn.store(fileWiki)

    # Create mock wiki
    md = """
    This is a test wiki
    =======================

    Blabber jabber blah blah boo.
    %s
    %s
    """ % (file_entity.id, second_file.id)

    wiki = Wiki(owner=project_entity,
                title='A Test Wiki',
                markdown=md,
                attachments=[attachname])
    wiki = syn.store(wiki)

    # Create a Wiki sub-page
    subwiki = Wiki(owner=project_entity,
                   title='A sub-wiki',
                   markdown='%s' % file_entity.id,
                   parentWikiId=wiki.id)
    subwiki = syn.store(subwiki)

    second_md = """
    Testing internal links
    ======================

    [test](#!Synapse:%s/wiki/%s)

    %s)
    """ % (project_entity.id, subwiki.id, second_file.id)

    sub_subwiki = Wiki(owner=project_entity,
                       title='A sub-sub-wiki',
                       markdown=second_md,
                       parentWikiId=subwiki.id,
                       attachments=[attachname])
    sub_subwiki = syn.store(sub_subwiki)

    # Copy wiki to second project
    second_project = syn.store(Project(name=str(uuid.uuid4())))
    schedule_for_cleanup(second_project.id)

    fileMapping = synapseutils.copy(syn,
                                    project_entity,
                                    second_project.id,
                                    skipCopyWikiPage=True)

    # Test: copyWikiPage = False
    assert_raises(SynapseHTTPError, syn.getWiki, second_project.id)

    first_headers = syn.getWikiHeaders(project_entity)
    second_headers = synapseutils.copyWiki(syn,
                                           project_entity.id,
                                           second_project.id,
                                           entityMap=fileMapping)

    mapping = dict()

    # Test: Check that all wikis were copied correctly with the correct mapping
    for index, info in enumerate(second_headers):
        mapping[first_headers[index]['id']] = info['id']
        assert_equals(first_headers[index]['title'], info['title'])
        if info.get('parentId', None) is not None:
            # Check if parent Ids are mapping correctly in the copied Wikis
            assert_equals(info['parentId'],
                          mapping[first_headers[index]['parentId']])

    # Test: Check that all wikis have the correct attachments and have correct internal synapse link/file mapping
    for index, info in enumerate(second_headers):
        # Check if markdown is the correctly mapped
        orig_wikiPage = syn.getWiki(project_entity, first_headers[index]['id'])
        new_wikiPage = syn.getWiki(second_project, info['id'])
        s = orig_wikiPage.markdown
        for oldWikiId in mapping.keys():
            oldProjectAndWikiId = "%s/wiki/%s" % (project_entity.id, oldWikiId)
            newProjectAndWikiId = "%s/wiki/%s" % (second_project.id,
                                                  mapping[oldWikiId])
            s = re.sub(oldProjectAndWikiId, newProjectAndWikiId, s)
        for oldFileId in fileMapping.keys():
            s = re.sub(oldFileId, fileMapping[oldFileId], s)
        assert_equals(s, new_wikiPage.markdown)
        orig_attach = syn.getWikiAttachments(orig_wikiPage)
        new_attach = syn.getWikiAttachments(new_wikiPage)

        orig_file = [
            i['fileName'] for i in orig_attach if i['concreteType'] !=
            "org.sagebionetworks.repo.model.file.PreviewFileHandle"
        ]
        new_file = [
            i['fileName'] for i in new_attach if i['concreteType'] !=
            "org.sagebionetworks.repo.model.file.PreviewFileHandle"
        ]

        # check that attachment file names are the same
        assert_equals(orig_file, new_file)

    # Test: copyWikiPage = True (Default) (Should copy all wikis including wikis on files)
    third_project = syn.store(Project(name=str(uuid.uuid4())))
    schedule_for_cleanup(third_project.id)

    copiedFile = synapseutils.copy(syn, second_file, third_project.id)
    copiedWiki = syn.getWiki(copiedFile[second_file.id])
    assert_equals(copiedWiki.title, fileWiki.title)
    assert_equals(copiedWiki.markdown, fileWiki.markdown)

    # Test: entitySubPageId
    third_header = synapseutils.copyWiki(syn,
                                         project_entity.id,
                                         third_project.id,
                                         entitySubPageId=sub_subwiki.id,
                                         destinationSubPageId=None,
                                         updateLinks=False,
                                         updateSynIds=False,
                                         entityMap=fileMapping)
    test_ent_subpage = syn.getWiki(third_project.id, third_header[0]['id'])

    # Test: No internal links updated
    assert_equals(test_ent_subpage.markdown, sub_subwiki.markdown)
    assert_equals(test_ent_subpage.title, sub_subwiki.title)

    # Test: destinationSubPageId
    fourth_header = synapseutils.copyWiki(
        syn,
        project_entity.id,
        third_project.id,
        entitySubPageId=subwiki.id,
        destinationSubPageId=test_ent_subpage.id,
        updateLinks=False,
        updateSynIds=False,
        entityMap=fileMapping)
    temp = syn.getWiki(third_project.id, fourth_header[0]['id'])
    # There are issues where some title pages are blank.  This is an issue that needs to be addressed
    assert_equals(temp.title, subwiki.title)

    assert_equals(temp.markdown, subwiki.markdown)

    temp = syn.getWiki(third_project.id, fourth_header[1]['id'])
    assert_equals(temp.title, sub_subwiki.title)
    assert_equals(temp.markdown, sub_subwiki.markdown)
Example #18
0
def test_copy():
    """Tests the copy function"""
    # Create a Project
    project_entity = syn.store(Project(name=str(uuid.uuid4())))
    schedule_for_cleanup(project_entity.id)
    # Create two Folders in Project
    folder_entity = syn.store(
        Folder(name=str(uuid.uuid4()), parent=project_entity))
    second_folder = syn.store(
        Folder(name=str(uuid.uuid4()), parent=project_entity))
    third_folder = syn.store(
        Folder(name=str(uuid.uuid4()), parent=project_entity))
    schedule_for_cleanup(folder_entity.id)
    schedule_for_cleanup(second_folder.id)
    schedule_for_cleanup(third_folder.id)

    # Annotations and provenance
    repo_url = 'https://github.com/Sage-Bionetworks/synapsePythonClient'
    annos = {'test': ['hello_world']}
    prov = Activity(name="test", used=repo_url)
    # Create, upload, and set annotations/provenance on a file in Folder
    filename = utils.make_bogus_data_file()
    schedule_for_cleanup(filename)
    file_entity = syn.store(File(filename, parent=folder_entity))
    externalURL_entity = syn.store(
        File(repo_url, name='rand', parent=folder_entity, synapseStore=False))
    syn.setAnnotations(file_entity, annos)
    syn.setAnnotations(externalURL_entity, annos)
    syn.setProvenance(externalURL_entity.id, prov)
    schedule_for_cleanup(file_entity.id)
    schedule_for_cleanup(externalURL_entity.id)
    # ------------------------------------
    # TEST COPY FILE
    # ------------------------------------
    output = synapseutils.copy(syn,
                               file_entity.id,
                               destinationId=project_entity.id)
    output_URL = synapseutils.copy(syn,
                                   externalURL_entity.id,
                                   destinationId=project_entity.id,
                                   skipCopyAnnotations=True)

    # Verify that our copied files are identical
    copied_ent = syn.get(output[file_entity.id])
    copied_URL_ent = syn.get(output_URL[externalURL_entity.id],
                             downloadFile=False)

    copied_ent_annot = syn.getAnnotations(copied_ent)
    copied_url_annot = syn.getAnnotations(copied_URL_ent)
    copied_prov = syn.getProvenance(copied_ent)
    copied_url_prov = syn.getProvenance(copied_URL_ent)
    schedule_for_cleanup(copied_ent.id)
    schedule_for_cleanup(copied_URL_ent.id)

    # TEST: set_Provenance = Traceback
    assert_equals(copied_prov['used'][0]['reference']['targetId'],
                  file_entity.id)
    assert_equals(copied_url_prov['used'][0]['reference']['targetId'],
                  externalURL_entity.id)

    # TEST: Make sure copied files are the same
    assert_equals(copied_ent_annot, annos)
    assert_equals(copied_ent.dataFileHandleId, file_entity.dataFileHandleId)

    # TEST: Make sure copied URLs are the same
    assert_equals(copied_url_annot, {})
    assert_equals(copied_URL_ent.externalURL, repo_url)
    assert_equals(copied_URL_ent.name, 'rand')
    assert_equals(copied_URL_ent.dataFileHandleId,
                  externalURL_entity.dataFileHandleId)

    # TEST: Throw error if file is copied to a folder/project that has a file with the same filename
    assert_raises(ValueError,
                  synapseutils.copy,
                  syn,
                  project_entity.id,
                  destinationId=project_entity.id)
    assert_raises(ValueError,
                  synapseutils.copy,
                  syn,
                  file_entity.id,
                  destinationId=project_entity.id)
    assert_raises(ValueError,
                  synapseutils.copy,
                  syn,
                  file_entity.id,
                  destinationId=third_folder.id,
                  setProvenance="gib")
    assert_raises(ValueError,
                  synapseutils.copy,
                  syn,
                  file_entity.id,
                  destinationId=file_entity.id)

    # Test: setProvenance = None
    output = synapseutils.copy(syn,
                               file_entity.id,
                               destinationId=second_folder.id,
                               setProvenance=None)
    assert_raises(SynapseHTTPError, syn.getProvenance, output[file_entity.id])
    schedule_for_cleanup(output[file_entity.id])

    # Test: setProvenance = Existing
    output_URL = synapseutils.copy(syn,
                                   externalURL_entity.id,
                                   destinationId=second_folder.id,
                                   setProvenance="existing")
    output_prov = syn.getProvenance(output_URL[externalURL_entity.id])
    schedule_for_cleanup(output_URL[externalURL_entity.id])
    assert_equals(output_prov['name'], prov['name'])
    assert_equals(output_prov['used'], prov['used'])

    # ------------------------------------
    # TEST COPY LINKS
    # ------------------------------------
    second_file = utils.make_bogus_data_file()
    # schedule_for_cleanup(filename)
    second_file_entity = syn.store(File(second_file, parent=project_entity))
    link_entity = Link(second_file_entity.id, parent=folder_entity.id)
    link_entity = syn.store(link_entity)

    copied_link = synapseutils.copy(syn,
                                    link_entity.id,
                                    destinationId=second_folder.id)
    old = syn.get(link_entity.id, followLink=False)
    new = syn.get(copied_link[link_entity.id], followLink=False)
    assert_equals(old.linksTo['targetId'], new.linksTo['targetId'])

    schedule_for_cleanup(second_file_entity.id)
    schedule_for_cleanup(link_entity.id)
    schedule_for_cleanup(copied_link[link_entity.id])

    time.sleep(3)

    assert_raises(ValueError,
                  synapseutils.copy,
                  syn,
                  link_entity.id,
                  destinationId=second_folder.id)

    # ------------------------------------
    # TEST COPY TABLE
    # ------------------------------------
    second_project = syn.store(Project(name=str(uuid.uuid4())))
    schedule_for_cleanup(second_project.id)
    cols = [
        Column(name='n', columnType='DOUBLE', maximumSize=50),
        Column(name='c', columnType='STRING', maximumSize=50),
        Column(name='i', columnType='INTEGER')
    ]
    data = [[2.1, 'foo', 10], [2.2, 'bar', 20], [2.3, 'baz', 30]]

    schema = syn.store(
        Schema(name='Testing', columns=cols, parent=project_entity.id))
    syn.store(RowSet(schema=schema, rows=[Row(r) for r in data]))

    table_map = synapseutils.copy(syn,
                                  schema.id,
                                  destinationId=second_project.id)
    copied_table = syn.tableQuery('select * from %s' % table_map[schema.id])
    rows = copied_table.asRowSet()['rows']
    # TEST: Check if all values are the same
    for i, row in enumerate(rows):
        assert_equals(row['values'], data[i])

    assert_raises(ValueError,
                  synapseutils.copy,
                  syn,
                  schema.id,
                  destinationId=second_project.id)

    schedule_for_cleanup(schema.id)
    schedule_for_cleanup(table_map[schema.id])

    # ------------------------------------
    # TEST COPY FOLDER
    # ------------------------------------
    mapping = synapseutils.copy(syn,
                                folder_entity.id,
                                destinationId=second_project.id)
    for i in mapping:
        old = syn.get(i, downloadFile=False)
        new = syn.get(mapping[i], downloadFile=False)
        assert_equals(old.name, new.name)
        assert_equals(old.annotations, new.annotations)
        assert_equals(old.concreteType, new.concreteType)

    assert_raises(ValueError,
                  synapseutils.copy,
                  syn,
                  folder_entity.id,
                  destinationId=second_project.id)
    # TEST: Throw error if excludeTypes isn't in file, link and table or isn't a list
    assert_raises(ValueError,
                  synapseutils.copy,
                  syn,
                  second_folder.id,
                  destinationId=second_project.id,
                  excludeTypes=["foo"])
    assert_raises(ValueError,
                  synapseutils.copy,
                  syn,
                  second_folder.id,
                  destinationId=second_project.id,
                  excludeTypes="file")
    # TEST: excludeType = ["file"], only the folder is created
    second = synapseutils.copy(syn,
                               second_folder.id,
                               destinationId=second_project.id,
                               excludeTypes=["file", "table", "link"])

    copied_folder = syn.get(second[second_folder.id])
    assert_equals(copied_folder.name, second_folder.name)
    assert_equals(len(second), 1)
    # TEST: Make sure error is thrown if foldername already exists

    assert_raises(ValueError,
                  synapseutils.copy,
                  syn,
                  second_folder.id,
                  destinationId=second_project.id)

    # ------------------------------------
    # TEST COPY PROJECT
    # ------------------------------------
    third_project = syn.store(Project(name=str(uuid.uuid4())))
    schedule_for_cleanup(third_project.id)

    mapping = synapseutils.copy(syn,
                                project_entity.id,
                                destinationId=third_project.id)
    for i in mapping:
        old = syn.get(i, downloadFile=False)
        new = syn.get(mapping[i], downloadFile=False)
        if not isinstance(old, Project):
            assert_equals(old.name, new.name)
        assert_equals(old.annotations, new.annotations)
        assert_equals(old.concreteType, new.concreteType)

    # TEST: Can't copy project to a folder
    assert_raises(ValueError,
                  synapseutils.copy,
                  syn,
                  project_entity.id,
                  destinationId=second_folder.id)
Example #19
0
def consortiumToPublic(
    syn,
    processingDate,
    genie_version,
    releaseId,
    databaseSynIdMappingDf,
    publicReleaseCutOff=365,
):
    cna_path = os.path.join(database_to_staging.GENIE_RELEASE_DIR,
                            "data_CNA_%s.txt" % genie_version)
    clinical_path = os.path.join(database_to_staging.GENIE_RELEASE_DIR,
                                 "data_clinical_%s.txt" % genie_version)
    clinical_sample_path = os.path.join(
        database_to_staging.GENIE_RELEASE_DIR,
        "data_clinical_sample_%s.txt" % genie_version,
    )
    clinicl_patient_path = os.path.join(
        database_to_staging.GENIE_RELEASE_DIR,
        "data_clinical_patient_%s.txt" % genie_version,
    )
    data_gene_panel_path = os.path.join(
        database_to_staging.GENIE_RELEASE_DIR,
        "data_gene_matrix_%s.txt" % genie_version)
    mutations_path = os.path.join(
        database_to_staging.GENIE_RELEASE_DIR,
        "data_mutations_extended_%s.txt" % genie_version,
    )
    fusions_path = os.path.join(database_to_staging.GENIE_RELEASE_DIR,
                                "data_fusions_%s.txt" % genie_version)
    seg_path = os.path.join(
        database_to_staging.GENIE_RELEASE_DIR,
        "genie_public_data_cna_hg19_%s.seg" % genie_version,
    )
    combined_bed_path = os.path.join(database_to_staging.GENIE_RELEASE_DIR,
                                     "genie_combined_%s.bed" % genie_version)

    if not os.path.exists(database_to_staging.GENIE_RELEASE_DIR):
        os.mkdir(database_to_staging.GENIE_RELEASE_DIR)
    if not os.path.exists(database_to_staging.CASE_LIST_PATH):
        os.mkdir(database_to_staging.CASE_LIST_PATH)

    # public release preview
    public_release_preview = databaseSynIdMappingDf["Id"][
        databaseSynIdMappingDf["Database"] == "public"].values[0]
    public_release_preview_caselist = database_to_staging.find_caselistid(
        syn, public_release_preview)

    #######################################################################
    # Sponsored projects filter
    #######################################################################
    # if before release date -> go into staging consortium
    # if after date -> go into public
    # sponsoredReleaseDate = syn.tableQuery('SELECT * FROM syn8545108')
    # sponsoredReleaseDateDf = sponsoredReleaseDate.asDataFrame()
    # sponsoredProjectSamples = syn.tableQuery('SELECT * FROM syn8545106')
    # sponsoredProjectSamplesDf = sponsoredProjectSamples.asDataFrame()
    # sponsoredProjectsDf = sponsoredProjectSamplesDf.merge(
    #     sponsoredReleaseDateDf, left_on="sponsoredProject",
    #     right_on="sponsoredProjects")
    # dates = sponsoredProjectsDf['releaseDate'].apply(
    #     lambda date: datetime.datetime.strptime(date, '%b-%Y'))
    # publicReleaseSamples = sponsoredProjectsDf['genieSampleId'][
    #     dates < processingDate]
    #######################################################################

    # SEQ_DATE filter
    # Jun-2015, given processing date (today) -> public release
    # (processing date - Jun-2015 > 12 months)
    consortiumReleaseWalk = synapseutils.walk(syn, releaseId)

    consortiumRelease = next(consortiumReleaseWalk)
    for filename, synid in consortiumRelease[2]:
        if filename == "data_clinical.txt":
            clinical = syn.get(synid, followLink=True)
        elif filename == "data_gene_matrix.txt":
            gene_matrix = syn.get(synid, followLink=True)
        elif filename == "assay_information.txt":
            assay_info = syn.get(synid, followLink=True)

    clinicalDf = pd.read_csv(clinical.path, sep="\t", comment="#")
    gene_matrixdf = pd.read_csv(gene_matrix.path, sep="\t")

    removeForPublicSamples = process_functions.seqDateFilter(
        clinicalDf, processingDate, publicReleaseCutOff)
    logger.info("SAMPLE CLASS FILTER")
    remove_sc_samples = database_to_staging.sample_class_filter(
        clinical_df=clinicalDf)
    removeForPublicSamples = list(
        set(removeForPublicSamples).union(remove_sc_samples))
    # comment back in when public release filter back on
    # publicReleaseSamples = publicReleaseSamples.append(keepForPublicSamples)
    # Make sure all null oncotree codes are removed
    clinicalDf = clinicalDf[~clinicalDf["ONCOTREE_CODE"].isnull()]
    publicReleaseSamples = clinicalDf.SAMPLE_ID[~clinicalDf.SAMPLE_ID.
                                                isin(removeForPublicSamples)]

    existing_seq_dates = clinicalDf.SEQ_DATE[clinicalDf.SAMPLE_ID.isin(
        publicReleaseSamples)]

    logger.info("SEQ_DATES for public release: " +
                ", ".join(set(existing_seq_dates.astype(str))))

    # Clinical release scope filter
    # If consortium -> Don't release to public
    clinicalReleaseScope = syn.tableQuery(
        "SELECT * FROM syn8545211 where releaseScope = 'public'")
    publicRelease = clinicalReleaseScope.asDataFrame()

    allClin = clinicalDf[clinicalDf["SAMPLE_ID"].isin(publicReleaseSamples)]
    allClin.to_csv(clinical_path, sep="\t", index=False)

    gene_matrixdf = gene_matrixdf[gene_matrixdf["SAMPLE_ID"].isin(
        publicReleaseSamples)]
    gene_matrixdf.to_csv(data_gene_panel_path, sep="\t", index=False)
    storeFile(
        syn,
        data_gene_panel_path,
        public_release_preview,
        genie_version,
        name="data_gene_matrix.txt",
    )
    storeFile(
        syn,
        clinical_path,
        public_release_preview,
        genie_version,
        name="data_clinical.txt",
    )

    create_case_lists.main(
        clinical_path,
        assay_info.path,
        database_to_staging.CASE_LIST_PATH,
        "genie_public",
    )

    caseListFiles = os.listdir(database_to_staging.CASE_LIST_PATH)
    caseListEntities = []
    for casePath in caseListFiles:
        casePath = os.path.join(database_to_staging.CASE_LIST_PATH, casePath)
        caseListEntities.append(
            storeFile(syn, casePath, public_release_preview_caselist,
                      genie_version))

    # Grab mapping table to fill in clinical headers
    mapping_table = syn.tableQuery("SELECT * FROM syn9621600")
    mapping = mapping_table.asDataFrame()
    genePanelEntities = []
    for entName, entId in consortiumRelease[2]:
        # skip files to convert
        if (entName.startswith("data_linear") or "meta_" in entName
                or entName.endswith(".html") or entName in [
                    "data_clinical_sample.txt",
                    "data_gene_matrix.txt",
                    "data_clinical_patient.txt",
                    "data_guide.pdf",
                    "release_notes.pdf",
                    "samples_to_retract.csv",
                    "non_somatic.csv",
                    "snv_as_dnp.csv",
                    "snv_as_onp.csv",
                ]):
            # data_gene_matrix was processed above because it had to be
            # used for generating caselists
            continue
        if entName == "data_clinical.txt":
            patientCols = publicRelease["fieldName"][publicRelease["level"] ==
                                                     "patient"].tolist()
            sampleCols = ["PATIENT_ID"]
            sampleCols.extend(publicRelease["fieldName"][publicRelease["level"]
                                                         == "sample"].tolist())
            # clinicalDf is defined on line 127
            clinicalDf = clinicalDf[clinicalDf["SAMPLE_ID"].isin(
                publicReleaseSamples)]

            # Delete columns that are private scope
            # for private in privateRelease:
            #   del clinicalDf[private]
            process_functions.addClinicalHeaders(
                clinicalDf,
                mapping,
                patientCols,
                sampleCols,
                clinical_sample_path,
                clinicl_patient_path,
            )

            storeFile(
                syn,
                clinical_sample_path,
                public_release_preview,
                genie_version,
                name="data_clinical_sample.txt",
            )
            storeFile(
                syn,
                clinicl_patient_path,
                public_release_preview,
                genie_version,
                name="data_clinical_patient.txt",
            )

        elif "mutation" in entName:
            mutation = syn.get(entId, followLink=True)
            mutationDf = pd.read_csv(mutation.path, sep="\t", comment="#")
            # mutationDf = commonVariantFilter(mutationDf)
            mutationDf["FILTER"] = "PASS"
            mutationDf = mutationDf[mutationDf["Tumor_Sample_Barcode"].isin(
                publicReleaseSamples)]
            text = process_functions.removeFloat(mutationDf)
            with open(mutations_path, "w") as f:
                f.write(text)
            storeFile(
                syn,
                mutations_path,
                public_release_preview,
                genie_version,
                name="data_mutations_extended.txt",
            )

        elif "fusion" in entName:
            fusion = syn.get(entId, followLink=True)
            fusionDf = pd.read_csv(fusion.path, sep="\t")
            fusionDf = fusionDf[fusionDf["Tumor_Sample_Barcode"].isin(
                publicReleaseSamples)]
            fusionDf.to_csv(fusions_path, sep="\t", index=False)
            storeFile(
                syn,
                fusions_path,
                public_release_preview,
                genie_version,
                name="data_fusions.txt",
            )
        elif "CNA" in entName:
            cna = syn.get(entId, followLink=True)
            cnaDf = pd.read_csv(cna.path, sep="\t")
            cna_columns = pd.concat(
                [publicReleaseSamples,
                 pd.Series("Hugo_Symbol")])
            # parse out the CNA columns to keep
            cnaDf = cnaDf[cnaDf.columns[cnaDf.columns.isin(cna_columns)]]
            text = process_functions.removeFloat(cnaDf)
            text = (text.replace("\t\t",
                                 "\tNA\t").replace("\t\t", "\tNA\t").replace(
                                     "\t\n", "\tNA\n"))
            with open(cna_path, "w") as cnaFile:
                cnaFile.write(text)
            storeFile(
                syn,
                cna_path,
                public_release_preview,
                genie_version,
                name="data_CNA.txt",
            )
        elif entName.endswith(".seg"):
            seg = syn.get(entId, followLink=True)
            segDf = pd.read_csv(seg.path, sep="\t")
            segDf = segDf[segDf["ID"].isin(publicReleaseSamples)]
            text = process_functions.removeFloat(segDf)
            with open(seg_path, "w") as segFile:
                segFile.write(text)
            storeFile(
                syn,
                seg_path,
                public_release_preview,
                genie_version,
                name="genie_public_data_cna_hg19.seg",
            )
        elif entName == "genomic_information.txt":
            bed = syn.get(entId, followLink=True)
            bedDf = pd.read_csv(bed.path, sep="\t")
            bedDf = bedDf[bedDf.SEQ_ASSAY_ID.isin(allClin.SEQ_ASSAY_ID)]
            bedDf.to_csv(combined_bed_path, sep="\t", index=False)
            storeFile(
                syn,
                combined_bed_path,
                public_release_preview,
                genie_version,
                name="genomic_information.txt",
            )
        elif entName.startswith("data_gene_panel"):
            genePanel = syn.get(entId, followLink=True)
            # Create new gene panel naming and store
            fileName = os.path.basename(genePanel.path)
            newFileList = fileName.split("_")
            newFileList[-1] = genie_version + ".txt"
            newFileName = "_".join(newFileList)
            genePanelPath = os.path.join(database_to_staging.GENIE_RELEASE_DIR,
                                         newFileName)
            shutil.copy(genePanel.path, genePanelPath)
            del newFileList[-1]
            entName = "_".join(newFileList)
            entName = entName + ".txt"
            genepanel_ent = storeFile(syn,
                                      genePanelPath,
                                      public_release_preview,
                                      genie_version,
                                      name=entName)
            genePanelEntities.append(genepanel_ent)
        else:
            ent = syn.get(entId, followLink=True, downloadFile=False)
            copiedId = synapseutils.copy(
                syn,
                ent,
                public_release_preview,
                version=ent.versionNumber,
                updateExisting=True,
                setProvenance=None,
                skipCopyAnnotations=True,
            )
            copiedEnt = syn.get(copiedId[ent.id], downloadFile=False)
            # Set version comment
            copiedEnt.versionComment = genie_version
            syn.store(copiedEnt, forceVersion=False)
    return caseListEntities, genePanelEntities
def test_copy():
    """Tests the copy function"""
    # Create a Project
    project_entity = syn.store(Project(name=str(uuid.uuid4())))
    schedule_for_cleanup(project_entity.id)
    acl = syn.setPermissions(
        project_entity,
        other_user['principalId'],
        accessType=['READ', 'CREATE', 'UPDATE', 'DOWNLOAD'])
    # Create two Folders in Project
    folder_entity = syn.store(
        Folder(name=str(uuid.uuid4()), parent=project_entity))
    second_folder = syn.store(
        Folder(name=str(uuid.uuid4()), parent=project_entity))
    third_folder = syn.store(
        Folder(name=str(uuid.uuid4()), parent=project_entity))
    schedule_for_cleanup(folder_entity.id)
    schedule_for_cleanup(second_folder.id)
    schedule_for_cleanup(third_folder.id)

    # Annotations and provenance
    repo_url = 'https://github.com/Sage-Bionetworks/synapsePythonClient'
    annots = {'test': ['hello_world']}
    prov = Activity(name="test", used=repo_url)
    # Create, upload, and set annotations/provenance on a file in Folder
    filename = utils.make_bogus_data_file()
    schedule_for_cleanup(filename)
    file_entity = syn.store(File(filename, parent=folder_entity))
    externalURL_entity = syn.store(
        File(repo_url, name='rand', parent=folder_entity, synapseStore=False))
    syn.setAnnotations(file_entity, annots)
    syn.setAnnotations(externalURL_entity, annots)
    syn.setProvenance(externalURL_entity.id, prov)
    schedule_for_cleanup(file_entity.id)
    schedule_for_cleanup(externalURL_entity.id)
    # ------------------------------------
    # TEST COPY FILE
    # ------------------------------------
    output = synapseutils.copy(syn,
                               file_entity.id,
                               destinationId=project_entity.id)
    output_URL = synapseutils.copy(syn,
                                   externalURL_entity.id,
                                   destinationId=project_entity.id,
                                   skipCopyAnnotations=True)

    #Verify that our copied files are identical
    copied_ent = syn.get(output[file_entity.id])
    copied_URL_ent = syn.get(output_URL[externalURL_entity.id],
                             downloadFile=False)

    copied_ent_annot = syn.getAnnotations(copied_ent)
    copied_url_annot = syn.getAnnotations(copied_URL_ent)
    copied_prov = syn.getProvenance(copied_ent)
    copied_url_prov = syn.getProvenance(copied_URL_ent)
    schedule_for_cleanup(copied_ent.id)
    schedule_for_cleanup(copied_URL_ent.id)

    # TEST: set_Provenance = Traceback
    print("Test: setProvenance = Traceback")
    assert copied_prov['used'][0]['reference']['targetId'] == file_entity.id
    assert copied_url_prov['used'][0]['reference'][
        'targetId'] == externalURL_entity.id

    # TEST: Make sure copied files are the same
    assert copied_ent_annot == annots
    assert copied_ent.dataFileHandleId == file_entity.dataFileHandleId

    # TEST: Make sure copied URLs are the same
    assert copied_url_annot == {}
    assert copied_URL_ent.externalURL == repo_url
    assert copied_URL_ent.name == 'rand'
    assert copied_URL_ent.dataFileHandleId == externalURL_entity.dataFileHandleId

    # TEST: Throw error if file is copied to a folder/project that has a file with the same filename
    assert_raises(ValueError,
                  synapseutils.copy,
                  syn,
                  project_entity.id,
                  destinationId=project_entity.id)
    assert_raises(ValueError,
                  synapseutils.copy,
                  syn,
                  file_entity.id,
                  destinationId=project_entity.id)
    assert_raises(ValueError,
                  synapseutils.copy,
                  syn,
                  file_entity.id,
                  destinationId=third_folder.id,
                  setProvenance="gib")
    assert_raises(ValueError,
                  synapseutils.copy,
                  syn,
                  file_entity.id,
                  destinationId=file_entity.id)

    print("Test: setProvenance = None")
    output = synapseutils.copy(syn,
                               file_entity.id,
                               destinationId=second_folder.id,
                               setProvenance=None)
    assert_raises(SynapseHTTPError, syn.getProvenance, output[file_entity.id])
    schedule_for_cleanup(output[file_entity.id])

    print("Test: setProvenance = Existing")
    output_URL = synapseutils.copy(syn,
                                   externalURL_entity.id,
                                   destinationId=second_folder.id,
                                   setProvenance="existing")
    output_prov = syn.getProvenance(output_URL[externalURL_entity.id])
    schedule_for_cleanup(output_URL[externalURL_entity.id])
    assert output_prov['name'] == prov['name']
    assert output_prov['used'] == prov['used']

    if 'username' not in other_user or 'password' not in other_user:
        sys.stderr.write(
            '\nWarning: no test-authentication configured. skipping testing copy function when trying to copy file made by another user.\n'
        )
        return

    try:
        print(
            "Test: Other user copy should result in different data file handle"
        )
        syn_other = synapseclient.Synapse(skip_checks=True)
        syn_other.login(other_user['username'], other_user['password'])

        output = synapseutils.copy(syn_other,
                                   file_entity.id,
                                   destinationId=third_folder.id)
        new_copied_ent = syn.get(output[file_entity.id])
        new_copied_ent_annot = syn.getAnnotations(new_copied_ent)
        schedule_for_cleanup(new_copied_ent.id)

        copied_URL_ent.externalURL = "https://www.google.com"
        copied_URL_ent = syn.store(copied_URL_ent)
        output = synapseutils.copy(syn_other,
                                   copied_URL_ent.id,
                                   destinationId=third_folder.id,
                                   version=1)
        new_copied_URL = syn.get(output[copied_URL_ent.id], downloadFile=False)
        schedule_for_cleanup(new_copied_URL.id)

        assert new_copied_ent_annot == annots
        assert new_copied_ent.dataFileHandleId != copied_ent.dataFileHandleId
        #Test if copying different versions gets you the correct file
        assert new_copied_URL.versionNumber == 1
        assert new_copied_URL.externalURL == repo_url
        assert new_copied_URL.dataFileHandleId != copied_URL_ent.dataFileHandleId
    finally:
        syn_other.logout()

    # ------------------------------------
    # TEST COPY LINKS
    # ------------------------------------
    print("Test: Copy Links")
    second_file = utils.make_bogus_data_file()
    #schedule_for_cleanup(filename)
    second_file_entity = syn.store(File(second_file, parent=project_entity))
    link_entity = Link(second_file_entity.id, parent=folder_entity.id)
    link_entity = syn.store(link_entity)

    #function under test uses queries which are eventually consistent but not immediately after creating the entities
    start_time = time.time()
    while syn.query("select id from entity where id=='%s'" %
                    link_entity.id).get('totalNumberOfResults') <= 0:
        assert_less(time.time() - start_time, QUERY_TIMEOUT_SEC)
        time.sleep(2)

    copied_link = synapseutils.copy(syn,
                                    link_entity.id,
                                    destinationId=second_folder.id)
    old = syn.get(link_entity.id, followLink=False)
    new = syn.get(copied_link[link_entity.id], followLink=False)
    assert old.linksTo['targetId'] == new.linksTo['targetId']
    assert old.linksTo['targetVersionNumber'] == new.linksTo[
        'targetVersionNumber']

    schedule_for_cleanup(second_file_entity.id)
    schedule_for_cleanup(link_entity.id)
    schedule_for_cleanup(copied_link[link_entity.id])

    time.sleep(3)

    assert_raises(ValueError,
                  synapseutils.copy,
                  syn,
                  link_entity.id,
                  destinationId=second_folder.id)

    # ------------------------------------
    # TEST COPY TABLE
    # ------------------------------------
    second_project = syn.store(Project(name=str(uuid.uuid4())))
    schedule_for_cleanup(second_project.id)
    print("Test: Copy Tables")
    cols = [
        Column(name='n', columnType='DOUBLE', maximumSize=50),
        Column(name='c', columnType='STRING', maximumSize=50),
        Column(name='i', columnType='INTEGER')
    ]
    data = [[2.1, 'foo', 10], [2.2, 'bar', 20], [2.3, 'baz', 30]]

    schema = syn.store(
        Schema(name='Testing', columns=cols, parent=project_entity.id))
    row_reference_set = syn.store(
        RowSet(columns=cols, schema=schema, rows=[Row(r) for r in data]))

    table_map = synapseutils.copy(syn,
                                  schema.id,
                                  destinationId=second_project.id)
    copied_table = syn.tableQuery('select * from %s' % table_map[schema.id])
    rows = copied_table.asRowSet()['rows']
    # TEST: Check if all values are the same
    for i, row in enumerate(rows):
        assert row['values'] == data[i]

    assert_raises(ValueError,
                  synapseutils.copy,
                  syn,
                  schema.id,
                  destinationId=second_project.id)

    schedule_for_cleanup(schema.id)
    schedule_for_cleanup(table_map[schema.id])

    # ------------------------------------
    # TEST COPY FOLDER
    # ------------------------------------
    print("Test: Copy Folder")
    mapping = synapseutils.copy(syn,
                                folder_entity.id,
                                destinationId=second_project.id)
    for i in mapping:
        old = syn.get(i, downloadFile=False)
        new = syn.get(mapping[i], downloadFile=False)
        assert old.name == new.name
        assert old.annotations == new.annotations
        assert old.concreteType == new.concreteType

    assert_raises(ValueError,
                  synapseutils.copy,
                  syn,
                  folder_entity.id,
                  destinationId=second_project.id)
    # TEST: Throw error if excludeTypes isn't in file, link and table or isn't a list
    assert_raises(ValueError,
                  synapseutils.copy,
                  syn,
                  second_folder.id,
                  destinationId=second_project.id,
                  excludeTypes=["foo"])
    assert_raises(ValueError,
                  synapseutils.copy,
                  syn,
                  second_folder.id,
                  destinationId=second_project.id,
                  excludeTypes="file")
    # TEST: excludeType = ["file"], only the folder is created
    second = synapseutils.copy(syn,
                               second_folder.id,
                               destinationId=second_project.id,
                               excludeTypes=["file", "table", "link"])

    copied_folder = syn.get(second[second_folder.id])
    assert copied_folder.name == second_folder.name
    assert len(second) == 1
    # TEST: Make sure error is thrown if foldername already exists
    start_time = time.time()
    while syn.query("select id from entity where id=='%s'" %
                    copied_folder.id).get('totalNumberOfResults') <= 0:
        assert_less(time.time() - start_time, QUERY_TIMEOUT_SEC)
        time.sleep(2)

    assert_raises(ValueError,
                  synapseutils.copy,
                  syn,
                  second_folder.id,
                  destinationId=second_project.id)

    # ------------------------------------
    # TEST COPY PROJECT
    # ------------------------------------
    print("Test: Copy Project")
    third_project = syn.store(Project(name=str(uuid.uuid4())))
    schedule_for_cleanup(third_project.id)

    mapping = synapseutils.copy(syn,
                                project_entity.id,
                                destinationId=third_project.id)
    for i in mapping:
        old = syn.get(i, downloadFile=False)
        new = syn.get(mapping[i], downloadFile=False)
        if not isinstance(old, Project):
            assert old.name == new.name
        assert old.annotations == new.annotations
        assert old.concreteType == new.concreteType

    # TEST: Can't copy project to a folder
    assert_raises(ValueError,
                  synapseutils.copy,
                  syn,
                  project_entity.id,
                  destinationId=second_folder.id)
def test_copyWiki():
    # Create a Project
    project_entity = syn.store(Project(name=str(uuid.uuid4())))

    schedule_for_cleanup(project_entity.id)

    folder_entity = syn.store(Folder(name=str(uuid.uuid4()), parent=project_entity))
    schedule_for_cleanup(folder_entity.id)
    second_folder = syn.store(Folder(name=str(uuid.uuid4()), parent=project_entity))
    schedule_for_cleanup(second_folder.id)
    third_folder = syn.store(Folder(name=str(uuid.uuid4()), parent=project_entity))
    schedule_for_cleanup(third_folder.id)

    filename = utils.make_bogus_data_file()
    attachname = utils.make_bogus_data_file()

    schedule_for_cleanup(filename)
    file_entity = syn.store(File(filename, parent=folder_entity))
    nested_folder = syn.store(Folder(name=str(uuid.uuid4()), parent=folder_entity))
    second_file = syn.store(File(filename, parent=nested_folder))

    schedule_for_cleanup(file_entity.id)
    schedule_for_cleanup(nested_folder.id)
    schedule_for_cleanup(second_file.id)

    fileWiki = Wiki(owner=second_file, title='A Test Wiki', markdown="Test")
    fileWiki = syn.store(fileWiki)

    
    #Create mock wiki
    md = """
    This is a test wiki
    =======================

    Blabber jabber blah blah boo.
    %s
    %s
    """ %(file_entity.id,second_file.id)

    wiki = Wiki(owner=project_entity, title='A Test Wiki', markdown=md, 
                attachments=[attachname])
    wiki = syn.store(wiki)

    # Create a Wiki sub-page
    subwiki = Wiki(owner=project_entity, title='A sub-wiki', 
                   markdown='%s' % file_entity.id, parentWikiId=wiki.id)
    subwiki = syn.store(subwiki)

    second_md = """
    Testing internal links
    ======================

    [test](#!Synapse:%s/wiki/%s)

    %s)
    """ % (project_entity.id,subwiki.id, second_file.id)

    sub_subwiki = Wiki(owner=project_entity, title='A sub-sub-wiki', 
                   markdown=second_md, parentWikiId=subwiki.id,
                   attachments=[attachname])
    sub_subwiki = syn.store(sub_subwiki)

    #Copy wiki to second project
    second_project = syn.store(Project(name=str(uuid.uuid4())))
    schedule_for_cleanup(second_project.id)

    fileMapping = synu.copy(syn, project_entity, second_project.id, copyWikiPage=False)
    
    print("Test: copyWikiPage = False")
    assert_raises(SynapseHTTPError,syn.getWiki,second_project.id)

    first_headers = syn.getWikiHeaders(project_entity)
    second_headers = synu.copyWiki(syn, project_entity.id, second_project.id, entityMap=fileMapping)

    mapping = dict()

    print("Test: Check that all wikis were copied correctly with the correct mapping")
    for index,info in enumerate(second_headers):
        mapping[first_headers[index]['id']] = info['id']
        assert first_headers[index]['title'] == info['title']
        if info.get('parentId',None) is not None:
            #Check if parent Ids are mapping correctly in the copied Wikis
            assert info['parentId'] == mapping[first_headers[index]['parentId']]

    print("Test: Check that all wikis have the correct attachments and have correct internal synapse link/file mapping")
    for index,info in enumerate(second_headers):
        #Check if markdown is the correctly mapped
        orig_wikiPage= syn.getWiki(project_entity, first_headers[index]['id'])
        new_wikiPage = syn.getWiki(second_project, info['id'])
        s = orig_wikiPage.markdown
        for oldWikiId in mapping.keys():
            oldProjectAndWikiId = "%s/wiki/%s" % (project_entity.id, oldWikiId)
            newProjectAndWikiId = "%s/wiki/%s" % (second_project.id, mapping[oldWikiId])
            s=re.sub(oldProjectAndWikiId, newProjectAndWikiId, s)
        for oldFileId in fileMapping.keys():
            s = re.sub(oldFileId, fileMapping[oldFileId], s)
        assert s == new_wikiPage.markdown
        orig_attach = syn.getWikiAttachments(orig_wikiPage)
        new_attach = syn.getWikiAttachments(new_wikiPage)
        #check that attachment file names are the same
        assert orig_attach == new_attach

    print("Test: copyWikiPage = True (Default) (Should copy all wikis including wikis on files)")
    third_project = syn.store(Project(name=str(uuid.uuid4())))
    schedule_for_cleanup(third_project.id)

    copiedFile = synu.copy(syn, second_file, third_project.id)
    copiedWiki = syn.getWiki(copiedFile[second_file.id])
    assert copiedWiki.title == fileWiki.title
    assert copiedWiki.markdown == fileWiki.markdown

    print("Test: entitySubPageId")
    third_header = synu.copyWiki(syn, project_entity.id, third_project.id, entitySubPageId=sub_subwiki.id, destinationSubPageId=None, updateLinks=False, updateSynIds=False,entityMap=fileMapping)
    test_ent_subpage = syn.getWiki(third_project.id,third_header[0]['id'])

    print("Test: No internal links updated")
    assert test_ent_subpage.markdown == sub_subwiki.markdown
    assert test_ent_subpage.title == sub_subwiki.title

    print("Test: destinationSubPageId")
    fourth_header = synu.copyWiki(syn, project_entity.id, third_project.id, entitySubPageId=subwiki.id, destinationSubPageId=test_ent_subpage.id, updateLinks=False, updateSynIds=False,entityMap=fileMapping)
    temp = syn.getWiki(third_project.id, fourth_header[0]['id'])
    #There are issues where some title pages are blank.  This is an issue that needs to be addressed
    #assert temp.title == subwiki.title
    assert temp.markdown == subwiki.markdown

    temp = syn.getWiki(third_project.id, fourth_header[1]['id'])
    assert temp.title == sub_subwiki.title
    assert temp.markdown == sub_subwiki.markdown
    assert fourth_header[0] == third_header[0]
def test_copy():
    """Tests the copy function"""
    # Create a Project
    project_entity = syn.store(Project(name=str(uuid.uuid4())))
    schedule_for_cleanup(project_entity.id)
    # Create two Folders in Project
    folder_entity = syn.store(Folder(name=str(uuid.uuid4()), parent=project_entity))
    second_folder = syn.store(Folder(name=str(uuid.uuid4()), parent=project_entity))
    third_folder = syn.store(Folder(name=str(uuid.uuid4()), parent=project_entity))
    schedule_for_cleanup(folder_entity.id)
    schedule_for_cleanup(second_folder.id)
    schedule_for_cleanup(third_folder.id)

    # Annotations and provenance
    repo_url = 'https://github.com/Sage-Bionetworks/synapsePythonClient'
    annos = {'test': ['hello_world']}
    prov = Activity(name="test", used=repo_url)
    # Create, upload, and set annotations/provenance on a file in Folder
    filename = utils.make_bogus_data_file()
    schedule_for_cleanup(filename)
    file_entity = syn.store(File(filename, parent=folder_entity))
    externalURL_entity = syn.store(File(repo_url, name='rand', parent=folder_entity, synapseStore=False))
    syn.setAnnotations(file_entity, annos)
    syn.setAnnotations(externalURL_entity, annos)
    syn.setProvenance(externalURL_entity.id, prov)
    schedule_for_cleanup(file_entity.id)
    schedule_for_cleanup(externalURL_entity.id)
    # ------------------------------------
    # TEST COPY FILE
    # ------------------------------------
    output = synapseutils.copy(syn, file_entity.id, destinationId=project_entity.id)
    output_URL = synapseutils.copy(syn, externalURL_entity.id, destinationId=project_entity.id,
                                   skipCopyAnnotations=True)

    # Verify that our copied files are identical
    copied_ent = syn.get(output[file_entity.id])
    copied_URL_ent = syn.get(output_URL[externalURL_entity.id], downloadFile=False)

    copied_ent_annot = syn.getAnnotations(copied_ent)
    copied_url_annot = syn.getAnnotations(copied_URL_ent)
    copied_prov = syn.getProvenance(copied_ent)
    copied_url_prov = syn.getProvenance(copied_URL_ent)
    schedule_for_cleanup(copied_ent.id)
    schedule_for_cleanup(copied_URL_ent.id)

    # TEST: set_Provenance = Traceback
    assert_equals(copied_prov['used'][0]['reference']['targetId'], file_entity.id)
    assert_equals(copied_url_prov['used'][0]['reference']['targetId'], externalURL_entity.id)

    # TEST: Make sure copied files are the same
    assert_equals(copied_ent_annot, annos)
    assert_equals(copied_ent.dataFileHandleId, file_entity.dataFileHandleId)

    # TEST: Make sure copied URLs are the same
    assert_equals(copied_url_annot, {})
    assert_equals(copied_URL_ent.externalURL, repo_url)
    assert_equals(copied_URL_ent.name, 'rand')
    assert_equals(copied_URL_ent.dataFileHandleId, externalURL_entity.dataFileHandleId)

    # TEST: Throw error if file is copied to a folder/project that has a file with the same filename
    assert_raises(ValueError, synapseutils.copy, syn, project_entity.id, destinationId=project_entity.id)
    assert_raises(ValueError, synapseutils.copy, syn, file_entity.id, destinationId=project_entity.id)
    assert_raises(ValueError, synapseutils.copy, syn, file_entity.id, destinationId=third_folder.id,
                  setProvenance="gib")
    assert_raises(ValueError, synapseutils.copy, syn, file_entity.id, destinationId=file_entity.id)

    # Test: setProvenance = None
    output = synapseutils.copy(syn, file_entity.id, destinationId=second_folder.id, setProvenance=None)
    assert_raises(SynapseHTTPError, syn.getProvenance, output[file_entity.id])
    schedule_for_cleanup(output[file_entity.id])

    # Test: setProvenance = Existing
    output_URL = synapseutils.copy(syn, externalURL_entity.id, destinationId=second_folder.id, setProvenance="existing")
    output_prov = syn.getProvenance(output_URL[externalURL_entity.id])
    schedule_for_cleanup(output_URL[externalURL_entity.id])
    assert_equals(output_prov['name'], prov['name'])
    assert_equals(output_prov['used'], prov['used'])

    # ------------------------------------
    # TEST COPY LINKS
    # ------------------------------------
    second_file = utils.make_bogus_data_file()
    # schedule_for_cleanup(filename)
    second_file_entity = syn.store(File(second_file, parent=project_entity))
    link_entity = Link(second_file_entity.id, parent=folder_entity.id)
    link_entity = syn.store(link_entity)

    copied_link = synapseutils.copy(syn, link_entity.id, destinationId=second_folder.id)
    old = syn.get(link_entity.id, followLink=False)
    new = syn.get(copied_link[link_entity.id], followLink=False)
    assert_equals(old.linksTo['targetId'], new.linksTo['targetId'])

    schedule_for_cleanup(second_file_entity.id)
    schedule_for_cleanup(link_entity.id)
    schedule_for_cleanup(copied_link[link_entity.id])

    time.sleep(3)

    assert_raises(ValueError, synapseutils.copy, syn, link_entity.id, destinationId=second_folder.id)

    # ------------------------------------
    # TEST COPY TABLE
    # ------------------------------------
    second_project = syn.store(Project(name=str(uuid.uuid4())))
    schedule_for_cleanup(second_project.id)
    cols = [Column(name='n', columnType='DOUBLE', maximumSize=50),
            Column(name='c', columnType='STRING', maximumSize=50),
            Column(name='i', columnType='INTEGER')]
    data = [[2.1, 'foo', 10],
            [2.2, 'bar', 20],
            [2.3, 'baz', 30]]

    schema = syn.store(Schema(name='Testing', columns=cols, parent=project_entity.id))
    syn.store(RowSet(schema=schema, rows=[Row(r) for r in data]))

    table_map = synapseutils.copy(syn, schema.id, destinationId=second_project.id)
    copied_table = syn.tableQuery('select * from %s' % table_map[schema.id])
    rows = copied_table.asRowSet()['rows']
    # TEST: Check if all values are the same
    for i, row in enumerate(rows):
        assert_equals(row['values'], data[i])

    assert_raises(ValueError, synapseutils.copy, syn, schema.id, destinationId=second_project.id)

    schedule_for_cleanup(schema.id)
    schedule_for_cleanup(table_map[schema.id])

    # ------------------------------------
    # TEST COPY FOLDER
    # ------------------------------------
    mapping = synapseutils.copy(syn, folder_entity.id, destinationId=second_project.id)
    for i in mapping:
        old = syn.get(i, downloadFile=False)
        new = syn.get(mapping[i], downloadFile=False)
        assert_equals(old.name, new.name)
        assert_equals(old.annotations, new.annotations)
        assert_equals(old.concreteType, new.concreteType)

    assert_raises(ValueError, synapseutils.copy, syn, folder_entity.id, destinationId=second_project.id)
    # TEST: Throw error if excludeTypes isn't in file, link and table or isn't a list
    assert_raises(ValueError, synapseutils.copy, syn, second_folder.id, destinationId=second_project.id,
                  excludeTypes=["foo"])
    assert_raises(ValueError, synapseutils.copy, syn, second_folder.id, destinationId=second_project.id,
                  excludeTypes="file")
    # TEST: excludeType = ["file"], only the folder is created
    second = synapseutils.copy(syn, second_folder.id, destinationId=second_project.id,
                               excludeTypes=["file", "table", "link"])

    copied_folder = syn.get(second[second_folder.id])
    assert_equals(copied_folder.name, second_folder.name)
    assert_equals(len(second), 1)
    # TEST: Make sure error is thrown if foldername already exists

    assert_raises(ValueError, synapseutils.copy, syn, second_folder.id, destinationId=second_project.id)

    # ------------------------------------
    # TEST COPY PROJECT
    # ------------------------------------
    third_project = syn.store(Project(name=str(uuid.uuid4())))
    schedule_for_cleanup(third_project.id)

    mapping = synapseutils.copy(syn, project_entity.id, destinationId=third_project.id)
    for i in mapping:
        old = syn.get(i, downloadFile=False)
        new = syn.get(mapping[i], downloadFile=False)
        if not isinstance(old, Project):
            assert_equals(old.name, new.name)
        assert_equals(old.annotations, new.annotations)
        assert_equals(old.concreteType, new.concreteType)

    # TEST: Can't copy project to a folder
    assert_raises(ValueError, synapseutils.copy, syn, project_entity.id, destinationId=second_folder.id)
def test_copy():
    """Tests the copy function"""

    # Create a Project
    project_entity = syn.store(Project(name=str(uuid.uuid4())))
    schedule_for_cleanup(project_entity.id)
    acl = syn.setPermissions(project_entity, other_user['principalId'], accessType=['READ', 'CREATE', 'UPDATE'])
    # Create two Folders in Project
    folder_entity = syn.store(Folder(name=str(uuid.uuid4()), parent=project_entity))
    second_folder = syn.store(Folder(name=str(uuid.uuid4()), parent=project_entity))
    third_folder = syn.store(Folder(name=str(uuid.uuid4()), parent=project_entity))
    schedule_for_cleanup(folder_entity.id)
    schedule_for_cleanup(second_folder.id)
    schedule_for_cleanup(third_folder.id)

    # Annotations and provenance
    repo_url = 'https://github.com/Sage-Bionetworks/synapsePythonClient'
    annots = {'test':['hello_world']}
    prov = Activity(name = "test",used = repo_url)
    # Create, upload, and set annotations/provenance on a file in Folder
    filename = utils.make_bogus_data_file()
    schedule_for_cleanup(filename)
    file_entity = syn.store(File(filename, parent=folder_entity))
    externalURL_entity = syn.store(File(repo_url,name='rand',parent=folder_entity,synapseStore=False))
    syn.setAnnotations(file_entity,annots)
    syn.setAnnotations(externalURL_entity,annots)
    syn.setProvenance(externalURL_entity.id, prov)
    schedule_for_cleanup(file_entity.id)
    schedule_for_cleanup(externalURL_entity.id)
    # ------------------------------------
    # TEST COPY FILE
    # ------------------------------------
    output = synu.copy(syn,file_entity.id,destinationId=project_entity.id)
    output_URL = synu.copy(syn,externalURL_entity.id,destinationId=project_entity.id)

    #Verify that our copied files are identical
    copied_ent = syn.get(output[file_entity.id])
    copied_URL_ent = syn.get(output_URL[externalURL_entity.id],downloadFile=False)

    copied_ent_annot = syn.getAnnotations(copied_ent)
    copied_url_annot = syn.getAnnotations(copied_URL_ent)
    copied_prov = syn.getProvenance(copied_ent)
    copied_url_prov = syn.getProvenance(copied_URL_ent)
    schedule_for_cleanup(copied_ent.id)
    schedule_for_cleanup(copied_URL_ent.id)

    # TEST: set_Provenance = Traceback
    print("Test: setProvenance = Traceback")
    assert copied_prov['used'][0]['reference']['targetId'] == file_entity.id
    assert copied_url_prov['used'][0]['reference']['targetId'] == externalURL_entity.id

    # TEST: Make sure copied files are the same
    assert copied_ent_annot == annots
    assert copied_ent.dataFileHandleId == file_entity.dataFileHandleId

    # TEST: Make sure copied URLs are the same
    assert copied_url_annot == annots
    assert copied_URL_ent.externalURL == repo_url
    assert copied_URL_ent.name == 'rand'
    assert copied_URL_ent.dataFileHandleId == externalURL_entity.dataFileHandleId

    # TEST: Throw error if file is copied to a folder/project that has a file with the same filename
    assert_raises(ValueError,synu.copy,syn,project_entity.id,destinationId = project_entity.id)
    assert_raises(ValueError,synu.copy,syn,file_entity.id,destinationId = project_entity.id) 
    assert_raises(ValueError,synu.copy,syn,file_entity.id,destinationId = third_folder.id,setProvenance = "gib")

    print("Test: setProvenance = None")
    output = synu.copy(syn,file_entity.id,destinationId=second_folder.id,setProvenance = None)
    assert_raises(SynapseHTTPError,syn.getProvenance,output[file_entity.id])
    schedule_for_cleanup(output[file_entity.id])

    print("Test: setProvenance = Existing")
    output_URL = synu.copy(syn,externalURL_entity.id,destinationId=second_folder.id,setProvenance = "existing")
    output_prov = syn.getProvenance(output_URL[externalURL_entity.id])
    schedule_for_cleanup(output_URL[externalURL_entity.id])
    assert output_prov['name'] == prov['name']
    assert output_prov['used'] == prov['used']

    if 'username' not in other_user or 'password' not in other_user:
        sys.stderr.write('\nWarning: no test-authentication configured. skipping testing copy function when trying to copy file made by another user.\n')
        return

    try:
        print("Test: Other user copy should result in different data file handle")
        syn_other = synapseclient.Synapse(skip_checks=True)
        syn_other.login(other_user['username'], other_user['password'])

        output = synu.copy(syn_other,file_entity.id,destinationId=third_folder.id)
        new_copied_ent = syn.get(output[file_entity.id])
        new_copied_ent_annot = syn.getAnnotations(new_copied_ent)
        schedule_for_cleanup(new_copied_ent.id)
        
        copied_URL_ent.externalURL = "https://www.google.com"
        copied_URL_ent = syn.store(copied_URL_ent)
        output = synu.copy(syn_other,copied_URL_ent.id,destinationId=third_folder.id,version=1)
        new_copied_URL = syn.get(output[copied_URL_ent.id],downloadFile=False)
        schedule_for_cleanup(new_copied_URL.id)

        assert new_copied_ent_annot == annots
        assert new_copied_ent.dataFileHandleId != copied_ent.dataFileHandleId
        #Test if copying different versions gets you the correct file
        assert new_copied_URL.versionNumber == 1
        assert new_copied_URL.externalURL == repo_url
        assert new_copied_URL.dataFileHandleId != copied_URL_ent.dataFileHandleId
    finally:
        syn_other.logout()
    # ------------------------------------
    # TEST COPY LINKS
    # ------------------------------------
    print("Test: Copy Links")
    second_file = utils.make_bogus_data_file()
    #schedule_for_cleanup(filename)
    second_file_entity = syn.store(File(second_file, parent=project_entity))
    link_entity = Link(second_file_entity.id,parent=folder_entity.id)
    link_entity = syn.store(link_entity)

    copied_link = synu.copy(syn,link_entity.id, destinationId=second_folder.id)
    old = syn.get(link_entity.id,followLink=False)
    new = syn.get(copied_link[link_entity.id],followLink=False)
    assert old.linksTo['targetId'] == new.linksTo['targetId']
    assert old.linksTo['targetVersionNumber'] == new.linksTo['targetVersionNumber']
    schedule_for_cleanup(second_file_entity.id)
    schedule_for_cleanup(link_entity.id)
    schedule_for_cleanup(copied_link[link_entity.id])

    assert_raises(ValueError,synu.copy,syn,link_entity.id,destinationId=second_folder.id)


    # ------------------------------------
    # TEST COPY TABLE
    # ------------------------------------
    second_project = syn.store(Project(name=str(uuid.uuid4())))
    schedule_for_cleanup(second_project.id)
    print("Test: Copy Tables")
    cols = [Column(name='n', columnType='DOUBLE', maximumSize=50),
            Column(name='c', columnType='STRING', maximumSize=50),
            Column(name='i', columnType='INTEGER')]
    data = [[2.1,'foo',10],
            [2.2,'bar',20],
            [2.3,'baz',30]]

    schema = syn.store(Schema(name='Testing', columns=cols, parent=project_entity.id))
    row_reference_set = syn.store(RowSet(columns=cols, schema=schema, rows=[Row(r) for r in data]))

    table_map = synu.copy(syn,schema.id, destinationId=second_project.id)
    copied_table = syn.tableQuery('select * from %s' %table_map[schema.id])
    rows = copied_table.asRowSet()['rows']
    # TEST: Check if all values are the same
    for i,row in enumerate(rows):
        assert row['values'] == data[i]

    assert_raises(ValueError,synu.copy,syn,schema.id,destinationId=second_project.id)

    schedule_for_cleanup(schema.id)
    schedule_for_cleanup(table_map[schema.id])

    # ------------------------------------
    # TEST COPY FOLDER
    # ------------------------------------
    print("Test: Copy Folder")
    mapping = synu.copy(syn,folder_entity.id,destinationId=second_project.id)
    for i in mapping:
        old = syn.get(i,downloadFile=False)
        new = syn.get(mapping[i],downloadFile=False)
        assert old.name == new.name
        assert old.annotations == new.annotations
        assert old.concreteType == new.concreteType

    assert_raises(ValueError,synu.copy,syn,folder_entity.id,destinationId=second_project.id)
    # TEST: Throw error if excludeTypes isn't in file, link and table or isn't a list
    assert_raises(ValueError,synu.copy,syn,second_folder.id,excludeTypes=["foo"])
    assert_raises(ValueError,synu.copy,syn,second_folder.id,excludeTypes="file")
    # TEST: excludeType = ["file"], only the folder is created
    second = synu.copy(syn,second_folder.id,destinationId=second_project.id,excludeTypes=["file","table","link"])
    copied_folder = syn.get(second[second_folder.id])
    assert copied_folder.name == second_folder.name
    assert len(second) == 1
    # TEST: Make sure error is thrown if foldername already exists
    assert_raises(ValueError,synu.copy,syn,second_folder.id, destinationId=second_project.id)

    # ------------------------------------
    # TEST COPY PROJECT
    # ------------------------------------
    print("Test: Copy Project")
    third_project = syn.store(Project(name=str(uuid.uuid4())))
    schedule_for_cleanup(third_project.id)

    mapping = synu.copy(syn,project_entity.id,destinationId=third_project.id)
    for i in mapping:
        old = syn.get(i,downloadFile=False)
        new = syn.get(mapping[i],downloadFile=False)
        if not isinstance(old, Project):
            assert old.name == new.name
        assert old.annotations == new.annotations
        assert old.concreteType == new.concreteType

    # TEST: Can't copy project to a folder
    assert_raises(ValueError,synu.copy,syn,project_entity.id,destinationId=second_folder.id)