Ejemplo n.º 1
0
def createLinkVersion(syn, genie_version, caseListEntities, genePanelEntities, databaseSynIdMappingDf):
    versioning = genie_version.split(".")
    logger.info(genie_version)
    main = versioning[0]
    releaseSynId = databaseSynIdMappingDf['Id'][databaseSynIdMappingDf['Database'] == 'release'].values[0]
    publicSynId = databaseSynIdMappingDf['Id'][databaseSynIdMappingDf['Database'] == 'public'].values[0]
    #second = ".".join(versioning[1:])
    releases = synapseutils.walk(syn, releaseSynId)
    mainReleaseFolders = next(releases)[1]
    releaseFolderSynId = [synId for folderName, synId in mainReleaseFolders if folderName == "Release %s" % main] 
    if len(releaseFolderSynId) > 0:
        secondRelease = synapseutils.walk(syn, releaseFolderSynId[0])
        secondReleaseFolders = next(secondRelease)[1]
        secondReleaseFolderSynIdList = [synId for folderName, synId in secondReleaseFolders if folderName == genie_version] 
        if len(secondReleaseFolderSynIdList) > 0:
            secondReleaseFolderSynId = secondReleaseFolderSynIdList[0]
        else:
            secondReleaseFolderSynId = syn.store(synapseclient.Folder(genie_version, parent = releaseFolderSynId[0])).id
    else:
        mainReleaseFolderId = syn.store(synapseclient.Folder("Release %s" % main, parent = releaseSynId)).id
        secondReleaseFolderSynId = syn.store(synapseclient.Folder(genie_version, parent = mainReleaseFolderId)).id

    caselistId = db_to_staging.find_caselistid(syn, secondReleaseFolderSynId)

    publicRelease = syn.getChildren(publicSynId)
    [syn.store(synapseclient.Link(ents['id'], parent=secondReleaseFolderSynId, targetVersion=ents['versionNumber'])) for ents in publicRelease if ents['type'] != "org.sagebionetworks.repo.model.Folder" and ents['name'] != "data_clinical.txt"  and not ents['name'].startswith("data_gene_panel")]
    [syn.store(synapseclient.Link(ents.id, parent=caselistId, targetVersion=ents.versionNumber)) for ents in caseListEntities]
    #Store gene panels
    [syn.store(synapseclient.Link(ents.id, parent=secondReleaseFolderSynId, targetVersion=ents.versionNumber)) for ents in genePanelEntities]
Ejemplo n.º 2
0
def test_data_pull_non_data_folder(syn_test_helper, mk_tempfile,
                                   mk_uniq_string, mk_kiproject):
    syn_project = syn_test_helper.create_project()

    syn_folder1 = syn_test_helper.client().store(
        synapseclient.Folder(name='Folder1', parent=syn_project))
    syn_test_helper.client().store(
        synapseclient.File(path=mk_tempfile(), parent=syn_folder1))

    syn_folder2 = syn_test_helper.client().store(
        synapseclient.Folder(name='Folder2', parent=syn_folder1))
    syn_test_helper.client().store(
        synapseclient.File(path=mk_tempfile(), parent=syn_folder2))

    syn_folder3 = syn_test_helper.client().store(
        synapseclient.Folder(name='Folder3', parent=syn_folder2))
    syn_test_helper.client().store(
        synapseclient.File(path=mk_tempfile(), parent=syn_folder3))

    syn_folder4 = syn_test_helper.client().store(
        synapseclient.Folder(name='Folder4', parent=syn_folder3))
    syn_test_helper.client().store(
        synapseclient.File(path=mk_tempfile(), parent=syn_folder4))

    syn_folder5 = syn_test_helper.client().store(
        synapseclient.Folder(name='Folder5', parent=syn_folder4))
    syn_test_helper.client().store(
        synapseclient.File(path=mk_tempfile(), parent=syn_folder5))

    kiproject = mk_kiproject()
    kiproject.data_add(DataUri('syn', syn_folder1.id).uri,
                       data_type=kiproject.data_types[0])
    kiproject.data_pull()
Ejemplo n.º 3
0
def create_folders(root, folder_list):
    """Create hierarchy of Synapse folders.

    Args:
        root: Synapse ID of a container.
        folder_list: list of folders in the same format as os.walk.
    Returns:
        A dictionary mapping the local folder to the created
        Synapse folder ID.
    """

    syn = Synapse().client()

    dirlookup = {'.': root}

    for directory, subdirectories, _ in folder_list:
        folder = dirlookup.get(directory, None)
        if not folder:
            folder = synapseclient.Folder(directory,
                                          parent=dirlookup[directory])
            folder = syn.store(folder)
        dirlookup[directory] = folder
        for subdir in subdirectories:
            curr = os.path.join(directory, subdir)
            # pylint: disable=line-too-long
            subfolder = dirlookup.get(
                curr, syn.store(synapseclient.Folder(subdir, parent=folder)))
            dirlookup[curr] = subfolder

    return dirlookup
Ejemplo n.º 4
0
def _getSynapseDir(syn, synapse_id, local_root, dir_list):
    """
    1. Walks through Synapse parent location hierarchy.
    2. update folders in Synapse to match the local dir,
    3. get key-value pairs of dirname and synapse id

    :param syn:
    :param synapse_id:
    :param local_root:
    :param dir_list:
    :return:
    """
    synapse_dir = {}
    synapse_root = syn.get(synapse_id)

    for (dirpath, dirpath_id), _, _ in synapseutils.walk(syn, synapse_id):
        dirpath = dirpath.replace(synapse_root.name,
                                  os.path.abspath(local_root))
        synapse_dir[dirpath] = dirpath_id

    for directory in dir_list:

        if not synapse_dir.has_key(directory):
            new_folder = synapseclient.Folder(
                os.path.basename(directory),
                synapse_dir[os.path.dirname(directory)])
            new_folder = syn.store(new_folder)
            synapse_dir[directory] = new_folder.id

    return synapse_dir
Ejemplo n.º 5
0
    def _find_or_create_syn_folder(self, syn_parent, folder_name):
        """Finds or creates a folder in Synapse.

        Args:
            syn_parent: The Synapse entity to find or create the folder under.
            folder_name: The name of the folder to find or create.

        Returns:
            synapseclient.Folder
        """

        # TODO: can any of this be cached?
        syn_entity_id = SynapseAdapter.client().findEntityId(folder_name,
                                                             parent=syn_parent)

        if syn_entity_id:
            syn_entity = SynapseAdapter.client().get(syn_entity_id)
            if self._is_folder(syn_entity):
                return syn_entity
            else:
                raise Exception(
                    'Cannot create folder, name: {0} already taken by another entity: {1}'
                    .format(folder_name, syn_entity.id))

        return SynapseAdapter.client().store(
            synapseclient.Folder(name=folder_name, parent=syn_parent))
Ejemplo n.º 6
0
def test_get_or_create_folder__call():
    """Makes sure correct parameters are called"""
    folder_name = str(uuid.uuid1())
    parentid = str(uuid.uuid1())
    folder = synapseclient.Folder(name=folder_name,
                                  parentId=parentid)
    returned = synapseclient.Folder(name=folder_name,
                                    id=str(uuid.uuid1()),
                                    parentId=parentid)
    with patch.object(CREATE_CLS,
                      "_find_by_obj_or_create",
                      return_value=returned) as patch_find_or_create:
        new_folder = CREATE_CLS.get_or_create_folder(name=folder_name,
                                                     parentId=parentid)
        assert new_folder == returned
        patch_find_or_create.assert_called_once_with(folder)
Ejemplo n.º 7
0
def test_valid__check_parentid_permission_container():
    """
    Test that parentid specified is a container and have permissions to access
    """
    parentid = "syn123"
    folder_ent = synapseclient.Folder("foo", parentId=parentid)
    with patch.object(syn, "get", return_value=folder_ent):
        validate._check_parentid_permission_container(syn, parentid)
Ejemplo n.º 8
0
def test_command_copy():
    """Tests the 'synapse cp' function"""

    # Create a Project
    project_entity = syn.store(synapseclient.Project(name=str(uuid.uuid4())))
    schedule_for_cleanup(project_entity.id)

    # Create a Folder in Project
    folder_entity = syn.store(synapseclient.Folder(name=str(uuid.uuid4()),
                                                   parent=project_entity))
    schedule_for_cleanup(folder_entity.id)
    # Create and upload a file in Folder
    repo_url = 'https://github.com/Sage-Bionetworks/synapsePythonClient'
    annots = {'test': ['hello_world']}
    # Create, upload, and set annotations on a file in Folder
    filename = utils.make_bogus_data_file()
    schedule_for_cleanup(filename)
    file_entity = syn.store(synapseclient.File(filename, parent=folder_entity))
    externalURL_entity = syn.store(synapseclient.File(repo_url, name='rand', parent=folder_entity, synapseStore=False))
    syn.setAnnotations(file_entity, annots)
    syn.setAnnotations(externalURL_entity, annots)
    schedule_for_cleanup(file_entity.id)
    schedule_for_cleanup(externalURL_entity.id)

    # Test cp function
    output = run('synapse', '--skip-checks', 'cp', file_entity.id, '--destinationId', project_entity.id)
    output_URL = run('synapse', '--skip-checks', 'cp', externalURL_entity.id, '--destinationId', project_entity.id)

    copied_id = parse(r'Copied syn\d+ to (syn\d+)', output)
    copied_URL_id = parse(r'Copied syn\d+ to (syn\d+)', output_URL)

    # Verify that our copied files are identical
    copied_ent = syn.get(copied_id)
    copied_URL_ent = syn.get(copied_URL_id, downloadFile=False)
    schedule_for_cleanup(copied_id)
    schedule_for_cleanup(copied_URL_id)
    copied_ent_annot = syn.getAnnotations(copied_id)
    copied_url_annot = syn.getAnnotations(copied_URL_id)

    copied_prov = syn.getProvenance(copied_id)['used'][0]['reference']['targetId']
    copied_url_prov = syn.getProvenance(copied_URL_id)['used'][0]['reference']['targetId']

    # Make sure copied files are the same
    assert_equals(copied_prov, file_entity.id)
    assert_equals(copied_ent_annot, annots)
    assert_equals(copied_ent.properties.dataFileHandleId, file_entity.properties.dataFileHandleId)

    # Make sure copied URLs are the same
    assert_equals(copied_url_prov, externalURL_entity.id)
    assert_equals(copied_url_annot, annots)
    assert_equals(copied_URL_ent.externalURL, repo_url)
    assert_equals(copied_URL_ent.name, 'rand')
    assert_equals(copied_URL_ent.properties.dataFileHandleId, externalURL_entity.properties.dataFileHandleId)

    # Verify that errors are being thrown when a
    # file is copied to a folder/project that has a file with the same filename
    assert_raises(ValueError, run, 'synapse', '--debug', '--skip-checks', 'cp', file_entity.id,
                  '--destinationId', project_entity.id)
Ejemplo n.º 9
0
    def _mk(syn_parent, count=2, suffix=''):
        syn_folders = []

        for folder_count in range(1, count + 1):
            folder_name = 'Folder{0}{1}'.format(folder_count, suffix)
            syn_folder = syn_test_helper.client().store(
                synapseclient.Folder(name=folder_name, parent=syn_parent))
            syn_folders.append(syn_folder)
        return syn_folders
Ejemplo n.º 10
0
def test_table_file_view_csv_update_annotations__includeEntityEtag():
    folder = syn.store(
        synapseclient.Folder(name="updateAnnoFolder" + str(uuid.uuid4()),
                             parent=project))
    anno1_name = "annotationColumn1"
    anno2_name = "annotationColumn2"
    initial_annotations = {
        anno1_name: "initial_value1",
        anno2_name: "initial_value2"
    }
    file_entity = syn.store(
        File(name=
             "test_table_file_view_csv_update_annotations__includeEntityEtag",
             path="~/fakepath",
             synapseStore=False,
             parent=folder,
             annotations=initial_annotations))

    annotation_columns = [
        Column(name=anno1_name, columnType='STRING'),
        Column(name=anno2_name, columnType='STRING')
    ]
    entity_view = syn.store(
        EntityViewSchema(name="TestEntityViewSchemaUpdateAnnotation" +
                         str(uuid.uuid4()),
                         parent=project,
                         scopes=[folder],
                         columns=annotation_columns))

    query_str = "SELECT {anno1}, {anno2} FROM {proj_id}".format(
        anno1=anno1_name, anno2=anno2_name, proj_id=utils.id_of(entity_view))

    #modify first annotation using rowset
    rowset_query_result = syn.tableQuery(query_str, resultsAs="rowset")
    rowset = rowset_query_result.asRowSet()
    rowset_changed_anno_value = "rowset_value_change"
    rowset.rows[0].values[0] = rowset_changed_anno_value
    syn.store(rowset)

    #modify second annotation using csv
    csv_query_result = syn.tableQuery(query_str, resultsAs="csv")
    dataframe = csv_query_result.asDataFrame()
    csv_changed_anno_value = "csv_value_change"
    dataframe.ix[0, anno2_name] = csv_changed_anno_value
    syn.store(Table(utils.id_of(entity_view), dataframe))

    #check annotations in the file entity. Annotations may not be immediately updated so we wait in while loop
    expected_annotations = {
        anno1_name: [rowset_changed_anno_value],
        anno2_name: [csv_changed_anno_value]
    }
    start_time = time.time()
    while (expected_annotations != file_entity.annotations):
        assert_less(time.time() - start_time, QUERY_TIMEOUT_SEC)
        time.sleep(2)
        file_entity = syn.get(file_entity, downloadFile=False)
def test_command_get_recursive_and_query():
    """Tests the 'synapse get -r' and 'synapse get -q' functions"""
    # Create a Project
    project_entity = syn.store(synapseclient.Project(name=str(uuid.uuid4())))
    schedule_for_cleanup(project_entity.id)

    # Create a Folder in Project
    folder_entity = syn.store(
        synapseclient.Folder(name=str(uuid.uuid4()), parent=project_entity))

    # Create and upload two files in Folder
    uploaded_paths = []
    for i in range(2):
        f = utils.make_bogus_data_file()
        uploaded_paths.append(f)
        schedule_for_cleanup(f)
        file_entity = synapseclient.File(f, parent=folder_entity)
        file_entity.location = 'folder'
        file_entity = syn.store(file_entity)
    #Add a file in the project level as well
    f = utils.make_bogus_data_file()
    uploaded_paths.append(f)
    schedule_for_cleanup(f)
    file_entity = synapseclient.File(f, parent=project_entity)
    file_entity.location = 'project'
    file_entity = syn.store(file_entity)

    ### Test recursive get
    output = run('synapse', '--skip-checks', 'get', '-r', project_entity.id)
    #Verify that we downloaded files:
    new_paths = [
        os.path.join('.', folder_entity.name, os.path.basename(f))
        for f in uploaded_paths[:-1]
    ]
    new_paths.append(os.path.join('.', os.path.basename(uploaded_paths[-1])))
    schedule_for_cleanup(folder_entity.name)
    for downloaded, uploaded in zip(new_paths, uploaded_paths):
        print uploaded, downloaded
        assert os.path.exists(downloaded)
        assert filecmp.cmp(downloaded, uploaded)
    schedule_for_cleanup(new_paths[0])

    ### Test query get
    output = run(
        'synapse', '--skip-checks', 'get', '-q',
        "select id from file where parentId=='%s' and location=='folder'" %
        folder_entity.id)
    #Verify that we downloaded files:
    new_paths = [
        os.path.join('.', os.path.basename(f)) for f in uploaded_paths[:-1]
    ]
    for downloaded, uploaded in zip(new_paths, uploaded_paths[:-1]):
        print uploaded, downloaded
        assert os.path.exists(downloaded)
        assert filecmp.cmp(downloaded, uploaded)
        schedule_for_cleanup(downloaded)
Ejemplo n.º 12
0
def _remote_folder(dirpath, remotes, syn):
    """Retrieve the remote folder for files, creating if necessary.
    """
    if dirpath in remotes:
        return remotes[dirpath], remotes
    else:
        parent_dir, cur_dir = os.path.split(dirpath)
        parent_folder, remotes = _remote_folder(parent_dir, remotes, syn)
        s_cur_dir = syn.store(synapseclient.Folder(cur_dir, parent=parent_folder))
        remotes[dirpath] = s_cur_dir.id
        return s_cur_dir.id, remotes
Ejemplo n.º 13
0
def get_parent_folder(project_id, key):
    parent_id = project_id
    folders = key.split('/')
    fn = folders.pop(-1)
    
    for f in folders:
        folder_id = syn.findEntityId(f, parent_id)
        if folder_id == None:
            # create folder: 
            folder_id = syn.store(synapseclient.Folder(name=f, parent=parent_id), forceVersion=False)['id']
        parent_id = folder_id

    return parent_id
def test_command_copy():
    """Tests the 'synapse cp' function"""
    # Create a Project
    project_entity = syn.store(synapseclient.Project(name=str(uuid.uuid4())))
    schedule_for_cleanup(project_entity.id)

    # Create a Folder in Project
    folder_entity = syn.store(
        synapseclient.Folder(name=str(uuid.uuid4()), parent=project_entity))
    # Create and upload a file in Folder
    dummy = utils.make_bogus_data_file()
    schedule_for_cleanup(dummy)
    dummy_entity = syn.store(synapseclient.File(dummy, parent=folder_entity))

    repo_url = 'https://github.com/Sage-Bionetworks/synapsePythonClient'
    annots = {'test': 'hello_world'}
    # Create, upload, and set annotations on a file in Folder
    filename = utils.make_bogus_data_file()
    schedule_for_cleanup(filename)
    file_entity = syn.store(synapseclient.File(filename, parent=folder_entity),
                            used=dummy_entity.id,
                            executed=repo_url)
    syn.setAnnotations(file_entity, annots)

    ### Test cp function
    output = run('synapse', '--skip-checks', 'cp', '--id', file_entity.id,
                 '--parentid', project_entity.id)

    copied_id = parse(r'Copied syn\d+ to (syn\d+)', output)
    #Verify that our copied files are identical
    copied_ent = syn.get(copied_id)
    schedule_for_cleanup(copied_id)
    copied_ent_annot = syn.getAnnotations(copied_ent)

    copied_annot = dict((key, copied_ent_annot[key].pop())
                        for key in copied_ent_annot
                        if key not in ('uri', 'id', 'creationDate', 'etag'))
    copied_prov = syn.getProvenance(
        copied_ent)['used'][0]['reference']['targetId']

    assert copied_prov == file_entity.id
    assert copied_annot == annots
    #Verify that errors are being thrown when folders/projects are attempted to be copied,
    #or file is copied to a foler/project that has a file with the same filename
    assert_raises(ValueError, run, 'synapse', '--debug', '--skip-checks', 'cp',
                  '--id', folder_entity.id, '--parentid', project_entity.id)
    assert_raises(ValueError, run, 'synapse', '--debug', '--skip-checks', 'cp',
                  '--id', project_entity.id, '--parentid', project_entity.id)
    assert_raises(ValueError, run, 'synapse', '--debug', '--skip-checks', 'cp',
                  '--id', file_entity.id, '--parentid', project_entity.id)
Ejemplo n.º 15
0
def syn_data(mk_syn_project, syn_test_helper, mk_syn_folders_files):
    """
    Creates this:

    data
        /core
            file1
            file2
            folder1/
                file1_1
                file2_1
                Folder1_1/
                    file1_2
                    file2_2
                Folder2_1/
                    file1_2
                    file2_2
            folder2/
                file1_1
                file2_1
                Folder1_1/
                    file1_2
                    file2_2
                Folder2_1/
                    file1_2
                    file2_2
        /auxiliary
            <same as core...>
    /results
        <same as core...>

    This method will return the root files/folders under data/core, data/artifacts, data/discovered.
    The data and data_type folders are NOT returned.
    """
    syn_project = mk_syn_project()
    root_folders = []
    root_files = []

    for template_path in DataTypeTemplate.default().paths:
        parent = syn_project

        for name in SysPath(template_path.rel_path).rel_parts:
            parent = syn_test_helper.client().store(
                synapseclient.Folder(name=name, parent=parent))

        folder, folders, files = mk_syn_folders_files(parent)
        root_folders += folders
        root_files += files

    return syn_project, root_folders, root_files
Ejemplo n.º 16
0
def test_entity_type_display_name():
    assert SynapseProxy.entity_type_display_name(
        'org.sagebionetworks.repo.model.Project'
    ) == SynapseProxy.PROJECT_TYPE_DISPLAY_NAME
    assert SynapseProxy.entity_type_display_name(
        syn.Project()) == SynapseProxy.PROJECT_TYPE_DISPLAY_NAME
    assert SynapseProxy.entity_type_display_name({
        'concreteType':
        'org.sagebionetworks.repo.model.Project'
    }) == SynapseProxy.PROJECT_TYPE_DISPLAY_NAME

    assert SynapseProxy.entity_type_display_name(
        'org.sagebionetworks.repo.model.Folder'
    ) == SynapseProxy.FOLDER_TYPE_DISPLAY_NAME
    assert SynapseProxy.entity_type_display_name(
        syn.Folder(parentId='syn0')) == SynapseProxy.FOLDER_TYPE_DISPLAY_NAME
    assert SynapseProxy.entity_type_display_name({
        'concreteType':
        'org.sagebionetworks.repo.model.Folder'
    }) == SynapseProxy.FOLDER_TYPE_DISPLAY_NAME

    assert SynapseProxy.entity_type_display_name(
        'org.sagebionetworks.repo.model.FileEntity'
    ) == SynapseProxy.FILE_TYPE_DISPLAY_NAME
    assert SynapseProxy.entity_type_display_name(
        syn.File(parentId='syn0')) == SynapseProxy.FILE_TYPE_DISPLAY_NAME
    assert SynapseProxy.entity_type_display_name({
        'concreteType':
        'org.sagebionetworks.repo.model.FileEntity'
    }) == SynapseProxy.FILE_TYPE_DISPLAY_NAME

    assert SynapseProxy.entity_type_display_name(
        'org.sagebionetworks.repo.model.Link'
    ) == SynapseProxy.LINK_TYPE_DISPLAY_NAME
    assert SynapseProxy.entity_type_display_name(
        syn.Link(parentId='syn0',
                 targetId='syn0')) == SynapseProxy.LINK_TYPE_DISPLAY_NAME
    assert SynapseProxy.entity_type_display_name({
        'concreteType':
        'org.sagebionetworks.repo.model.Link'
    }) == SynapseProxy.LINK_TYPE_DISPLAY_NAME

    assert SynapseProxy.entity_type_display_name(
        'org.sagebionetworks.repo.model.table.TableEntity'
    ) == SynapseProxy.TABLE_TYPE_DISPLAY_NAME
    assert SynapseProxy.entity_type_display_name({
        'concreteType':
        'org.sagebionetworks.repo.model.table.TableEntity'
    }) == SynapseProxy.TABLE_TYPE_DISPLAY_NAME
Ejemplo n.º 17
0
def test_it_does_not_push_a_file_unless_the_local_file_changed(
        mk_kiproject, mk_syn_files, syn_client, mocker):
    kiproject = mk_kiproject()

    # Get the Synapse project for the KiProject
    syn_project = syn_client.get(DataUri.parse(kiproject.project_uri).id)

    syn_data_folder = syn_client.store(
        synapseclient.Folder(name='data', parent=syn_project))
    syn_core_folder = syn_client.store(
        synapseclient.Folder(name='core', parent=syn_data_folder))

    # Create a Synapse file to add/pull/push
    syn_file = mk_syn_files(syn_core_folder, file_num=1, versions=1,
                            suffix='')[0]

    syn_file_uri = DataUri('syn', syn_file.id).uri
    kiproject.data_add(syn_file_uri, data_type=kiproject.data_types[0])
    kiproject.data_pull()

    # The file exists in the Synapse project and has been pulled locally.
    # Pushing again should NOT upload the file again.
    mocker.spy(synapseclient.client, 'upload_file_handle')
    kiproject.data_push(syn_file_uri)
 def test__create_synapse_resources_folder(self):
     """Test folders gets created"""
     folder_config = [
         {
             'name': 'Test 1',
             'type': 'Folder'
         },
         {
             'name': 'Test 2',
             'type': 'Folder'
         }
     ]
     expected_config = [
         {
             'name': 'Test 1',
             'type': 'Folder',
             'id': 'syn33333'
         },
         {
             'name': 'Test 2',
             'type': 'Folder',
             'id': 'syn22222'
         }
     ]
     folder_ent_1 = synapseclient.Folder(id="syn33333", parentId="syn5555")
     folder_ent_2 = synapseclient.Folder(id="syn22222", parentId="syn5555")
     call_1 = mock.call(name="Test 1", parentId="syn5555")
     call_2 = mock.call(name="Test 2", parentId="syn5555")
     with patch.object(self.create_cls, "get_or_create_folder",
                       side_effect=[folder_ent_1,
                                    folder_ent_2]) as patch_create:
         client._create_synapse_resources(config_list=folder_config,
                                          creation_cls=self.create_cls,
                                          parentid="syn5555")
         patch_create.assert_has_calls([call_1, call_2])
         assert folder_config == expected_config
 def test__create_synapse_resources_recursive(self):
     """Test recursive calls are made"""
     project_ent = synapseclient.Project(id="syn5555")
     folder_ent = synapseclient.Folder(id="syn33333", parentId="syn5555")
     call_1 = mock.call(name="Genes", parentId="syn5555")
     call_2 = mock.call(name="testing", parentId="syn33333")
     with patch.object(self.create_cls, "get_or_create_project",
                       return_value=project_ent) as patch_create_proj,\
          patch.object(self.create_cls, "get_or_create_folder",
                       return_value=folder_ent) as patch_create_folder:
         client._create_synapse_resources(config_list=self.config,
                                          creation_cls=self.create_cls)
         patch_create_proj.assert_called_once_with(
             name="Test Configuration"
         )
         patch_create_folder.assert_has_calls([call_1, call_2])
    def follow_path_to_folder(self, path, origin=None, create=False):
        """Return terminal folder's synID after traversing the defined path."""
        if origin is None:
            origin = self.project_id

        try:
            name = path.popleft()
        except AttributeError:
            path = deque(path)
            name = path.popleft()

        try:
            is_folder_named_x_partial = partial(is_folder_named_x, name=name)
            next_node_id = self.check_children(
                node_id=origin, func=is_folder_named_x_partial)[0]
        except IndexError:
            # If no child is found:
            if create:
                # create synapse folder object if we were told to
                parent_obj = self.node[origin].obj
                new_folder = synapse.Folder(name, parent=parent_obj)
                new_folder = self.syn.store(new_folder)
                new_folder_id = new_folder['id']

                # add new edge to DAG and mark for update
                self.add_edge(u=origin, v=new_folder_id, attr_dict=None)

                entity_dict = {k: v for k, v in new_folder.items()}
                self.node[new_folder_id] = SynNode(entity_dict=entity_dict,
                                                   synapse_session=self.syn,
                                                   is_root=False)

                # send the final result back up the chain.
                return new_folder_id

            else:
                # raise an error otherwise
                raise e.NoResult()

        # send next_node_id along to next level
        # or send the final result back up the chain.
        if path:
            return self.follow_path_to_folder(path=path,
                                              origin=next_node_id,
                                              create=create)
        else:
            return next_node_id
Ejemplo n.º 21
0
def mk_local_files_and_folders(start_path,
                               prefix='',
                               depth=3,
                               file_count=3,
                               folder_count=3,
                               current_depth=0,
                               syn_client=None,
                               syn_parent=None):
    current_depth += 1

    local_results = []
    remote_results = []

    for _ in range(0, file_count):
        filename = '{0}test_file_{1}_{2}.dat'.format(prefix, current_depth, gen_id())
        file_path = os.path.join(start_path, filename)

        # Fill the file with random data.
        write_random_data_to_file(file_path)
        local_results.append(file_path)

        # Store the file in Synapse
        if syn_parent:
            syn_file = syn_client.store(syn.File(path=file_path, parent=syn_parent))
            remote_results.append(syn_file)

    if current_depth < depth:
        # Create the folders.
        for _ in range(0, folder_count):
            foldername = '{0}test_folder_{1}_{2}'.format(prefix, current_depth, gen_id())
            folder_path = mk_dirs(start_path, foldername)
            local_results.append(folder_path)
            # Create the folder in Synapse
            if syn_parent:
                syn_folder = syn_client.store(syn.Folder(name=foldername, parent=syn_parent))
                remote_results.append(syn_folder)
            more_locals, more_remotes = mk_local_files_and_folders(folder_path,
                                                                   prefix=prefix,
                                                                   depth=depth,
                                                                   current_depth=current_depth,
                                                                   syn_client=syn_client,
                                                                   syn_parent=syn_folder)
            local_results += more_locals
            remote_results += more_remotes

    return local_results, remote_results
Ejemplo n.º 22
0
 def setUp(self):
     print("Creating private Project...")
     test_project = sc.Project("Test" + uuid.uuid4().hex)
     self.project_id = syn.store(test_project).id
     print("Creating Folder...")
     folder = sc.Folder("folder", parent=self.project_id)
     self.folder_id = syn.store(folder).id
     print("Creating File within Folder...")
     with tempfile.NamedTemporaryFile() as temp:
         temp.write("123testingfolder")
         temp.flush()
         temp_file = sc.File(temp.name, parent=self.folder_id)
         self.folder_fileId = syn.store(temp_file).id
     print("Creating File within Project...")
     with tempfile.NamedTemporaryFile() as temp:
         temp.write("123testingproject")
         temp.flush()
         temp_file = sc.File(temp.name, parent=self.project_id)
         self.project_fileId = syn.store(temp_file).id
Ejemplo n.º 23
0
    def _create_folder_in_synapse(self, path, synapse_parent):
        synapse_folder = None

        if not synapse_parent:
            self.has_errors = True
            logging.error(
                'Parent not found, cannot create folder: {0}'.format(path))
            return synapse_folder

        folder_name = os.path.basename(path)
        full_synapse_path = self._get_synapse_path(folder_name, synapse_parent)

        max_attempts = 5
        attempt_number = 0
        exception = None

        while attempt_number < max_attempts and not synapse_folder:
            try:
                attempt_number += 1
                exception = None
                synapse_folder = self._synapse_client.store(syn.Folder(
                    name=folder_name, parent=synapse_parent),
                                                            forceVersion=False)
            except Exception as ex:
                exception = ex
                logging.error('[Folder ERROR] {0} -> {1} : {2}'.format(
                    path, full_synapse_path, str(ex)))
                if attempt_number < max_attempts:
                    sleep_time = random.randint(1, 5)
                    logging.info('[Folder RETRY in {0}s] {1} -> {2}'.format(
                        sleep_time, path, full_synapse_path))
                    time.sleep(sleep_time)

        if exception:
            self.has_errors = True
            logging.error('[Folder FAILED] {0} -> {1} : {2}'.format(
                path, full_synapse_path, str(exception)))
        else:
            logging.info('[Folder] {0} -> {1}'.format(path, full_synapse_path))
            self._set_synapse_parent(synapse_folder)

        return synapse_folder
Ejemplo n.º 24
0
def main(syn):

    # Basic setup of the project
    project_name = "Testing Synapse Genie"

    # Determine the short and long names of the centers.
    center_abbreviations = ['AAA', 'BBB', 'CCC']
    center_names = center_abbreviations

    # Create the project
    project = synapseclient.Project(project_name)
    project = syn.store(project)

    # Create a folder for log files generated by the GENIE processes
    # of validation and updating the database tables
    logs_folder = synapseclient.Folder(name='Logs', parent=project)
    logs_folder = syn.store(logs_folder)

    # Folder for individual center folders
    root_center_folder = synapseclient.Folder(name='Centers', parent=project)
    root_center_folder = syn.store(root_center_folder)

    # The folders for each center where they will upload files for validation
    # and submission. There is one folder per center.
    # This currently deviates from the original GENIE setup of having an
    # 'Input' and 'Staging' folder for each center.
    center_folders = [
        synapseclient.Folder(name=name, parent=root_center_folder)
        for name in center_abbreviations
    ]
    center_folders = [syn.store(folder) for folder in center_folders]

    # Make some fake data that only contains basic text to check
    # for validation.

    n_files = 5  # number of files per center to create

    for folder in center_folders:
        for idx in range(n_files):
            tmp = tempfile.NamedTemporaryFile(prefix=f'TEST-{folder.name}',
                                              suffix='.txt')
            with open(tmp.name, mode='w') as fh:
                fh.write(random.choice(['ERROR', 'VALID', 'NOPE']))
            synfile = syn.store(synapseclient.File(tmp.name, parent=folder))

    # Set up the table that holds the validation status of all submitted files.
    status_schema = create_status_table(syn, project)

    # Set up the table that maps the center abbreviation to the folder where
    # their data is uploaded. This is used by the GENIE framework to find the
    # files to validate for a center.
    center_map_table_defs = [
        {
            'name': 'name',
            'columnType': 'STRING',
            'maximumSize': 250
        },
        {
            'name': 'center',
            'columnType': 'STRING',
            'maximumSize': 50
        },
        {
            'name': 'inputSynId',
            'columnType': 'ENTITYID'
        },
        # {'name': 'stagingSynId',
        #  'columnType': 'ENTITYID'},
        {
            'name': 'release',
            'defaultValue': 'false',
            'columnType': 'BOOLEAN'
        }
        # {'id': '68438',
        #  'name': 'mutationInCisFilter',
        #  'defaultValue': 'true',
        #  'columnType': 'BOOLEAN',
        #  'concreteType': 'org.sagebionetworks.repo.model.table.ColumnModel'}
    ]

    center_map_cols = [
        synapseclient.Column(**col) for col in center_map_table_defs
    ]

    center_schema = synapseclient.Schema(name='Center Table',
                                         columns=center_map_cols,
                                         parent=project)
    center_schema = syn.store(center_schema)

    # Add the center folders created above to this table.
    center_folder_ids = [folder.id for folder in center_folders]
    center_df = pandas.DataFrame(
        dict(name=center_names,
             center=center_abbreviations,
             inputSynId=center_folder_ids))

    tbl = synapseclient.Table(schema=center_schema, values=center_df)
    tbl = syn.store(tbl)

    # Create a table that stores the error logs for each submitted file.
    error_col_defs = [
        {
            'name': 'id',
            'columnType': 'ENTITYID'
        },
        {
            'name': 'center',
            'columnType': 'STRING',
            'maximumSize': 50,
            'facetType': 'enumeration'
        },
        {
            'name': 'errors',
            'columnType': 'LARGETEXT'
        },
        {
            'name': 'name',
            'columnType': 'STRING',
            'maximumSize': 500
        },
        # {'name': 'versionNumber',
        #  'columnType': 'STRING',
        #  'maximumSize': 50},
        {
            'name': 'fileType',
            'columnType': 'STRING',
            'maximumSize': 50
        }
    ]

    error_map_cols = [synapseclient.Column(**col) for col in error_col_defs]
    error_schema = synapseclient.Schema(name='Error Table',
                                        columns=error_map_cols,
                                        parent=project)
    error_schema = syn.store(error_schema)

    # Create a table that maps the various database tables to a short name.
    # This table is used in many GENIE functions to find the correct table to update
    # or get the state of something from.

    db_map_col_defs = [{
        'name': 'Database',
        'columnType': 'STRING',
        'maximumSize': 50
    }, {
        'name': 'Id',
        'columnType': 'ENTITYID'
    }]

    db_map_cols = [synapseclient.Column(**col) for col in db_map_col_defs]
    db_map_schema = synapseclient.Schema(name='DB Mapping Table',
                                         columns=db_map_cols,
                                         parent=project)
    db_map_schema = syn.store(db_map_schema)

    # Add dbMapping annotation
    project.annotations.dbMapping = db_map_schema.tableId
    project = syn.store(project)
    # Add the tables we already created to the mapping table.
    dbmap_df = pandas.DataFrame(
        dict(Database=[
            'centerMapping', 'validationStatus', 'errorTracker', 'dbMapping',
            'logs'
        ],
             Id=[
                 center_schema.id, status_schema.id, error_schema.id,
                 db_map_schema.id, logs_folder.id
             ]))

    db_map_tbl = synapseclient.Table(schema=db_map_schema, values=dbmap_df)
    db_map_tbl = syn.store(db_map_tbl)

    # Make a top level folder for output. Some processing for
    # file types copy a file from one place to another.
    output_folder = synapseclient.Folder(name='Output', parent=project)
    output_folder = syn.store(output_folder)

    output_folder_map = []

    # default_table_col_defs = status_table_col_defs = [
    #     {'name': 'PRIMARY_KEY',
    #      'columnType': 'STRING'}
    # ]
    # default_table_cols = [synapseclient.Column(**col)
    #                       for col in default_table_col_defs]

    default_primary_key = 'PRIMARY_KEY'

    # For each file type format in the format registry, create an output folder and a table.
    # Some GENIE file types copy a file to a new place, and some update a table. Having both
    # means that both of these operations will be available at the beginning.
    # The mapping between the file type and the folder or table have a consistent naming.
    # The key ('Database' value) is {file_type}_folder or {file_type}_table.
    # Determine which file formats are going to be used.
    format_registry = config.collect_format_types(['example_registry'])

    for file_type, obj in format_registry.items():
        file_type_folder = synapseclient.Folder(name=file_type,
                                                parent=output_folder)
        file_type_folder = syn.store(file_type_folder)
        output_folder_map.append(
            dict(Database=f"{file_type}_folder", Id=file_type_folder.id))

        file_type_schema = synapseclient.Schema(name=file_type, parent=project)
        file_type_schema.annotations.primaryKey = default_primary_key
        file_type_schema = syn.store(file_type_schema)

        output_folder_map.append(
            dict(Database=f"{file_type}_table", Id=file_type_schema.id))

    # Add the folders and tables created to the mapping table.
    db_map_tbl = synapseclient.Table(
        schema=db_map_schema, values=pandas.DataFrame(output_folder_map))
    db_map_tbl = syn.store(db_map_tbl)
Ejemplo n.º 25
0
import synapseclient

from challengeutils import permissions

SYN = create_autospec(synapseclient.Synapse)
SET_PERMS = {"set"}


@pytest.mark.parametrize(
    "entity,principalid,permission_level,mapped",
    [
        # tuple with (input, expectedOutput)
        (synapseclient.Project(), None, "view",
         permissions.ENTITY_PERMS_MAPPINGS['view']),
        (synapseclient.Folder(parentId="syn123"), None, "download",
         permissions.ENTITY_PERMS_MAPPINGS['download']),
        (synapseclient.Entity(), None, "edit",
         permissions.ENTITY_PERMS_MAPPINGS['edit']),
        (synapseclient.Schema(parentId="syn123"), None, "edit_and_delete",
         permissions.ENTITY_PERMS_MAPPINGS['edit_and_delete']),
        (synapseclient.File(parentId="syn123"), None, "admin",
         permissions.ENTITY_PERMS_MAPPINGS['admin']),
        (synapseclient.Entity(), None, "remove",
         permissions.ENTITY_PERMS_MAPPINGS['remove']),
        (synapseclient.Evaluation(contentSource="syn123"), None, "view",
         permissions.EVALUATION_PERMS_MAPPINGS['view']),
        (synapseclient.Evaluation(contentSource="syn123"), None, "submit",
         permissions.EVALUATION_PERMS_MAPPINGS['submit']),
        (synapseclient.Evaluation(contentSource="syn123"), None, "score",
         permissions.EVALUATION_PERMS_MAPPINGS['score']),
def test_command_get_recursive_and_query():
    """Tests the 'synapse get -r' and 'synapse get -q' functions"""

    project_entity = project

    # Create Folders in Project
    folder_entity = syn.store(
        synapseclient.Folder(name=str(uuid.uuid4()), parent=project_entity))

    folder_entity2 = syn.store(
        synapseclient.Folder(name=str(uuid.uuid4()), parent=folder_entity))

    # Create and upload two files in sub-Folder
    uploaded_paths = []
    file_entities = []

    for i in range(2):
        f = utils.make_bogus_data_file()
        uploaded_paths.append(f)
        schedule_for_cleanup(f)
        file_entity = synapseclient.File(f, parent=folder_entity2)
        file_entity = syn.store(file_entity)
        file_entities.append(file_entity)
        schedule_for_cleanup(f)

    # Add a file in the Folder as well
    f = utils.make_bogus_data_file()
    uploaded_paths.append(f)
    schedule_for_cleanup(f)
    file_entity = synapseclient.File(f, parent=folder_entity)
    file_entity = syn.store(file_entity)
    file_entities.append(file_entity)

    # get -r uses syncFromSynapse() which uses getChildren(), which is not immediately consistent,
    # but faster than chunked queries.
    time.sleep(2)
    # Test recursive get
    run('synapse', '--skip-checks', 'get', '-r', folder_entity.id)
    # Verify that we downloaded files:
    new_paths = [
        os.path.join('.', folder_entity2.name, os.path.basename(f))
        for f in uploaded_paths[:-1]
    ]
    new_paths.append(os.path.join('.', os.path.basename(uploaded_paths[-1])))
    schedule_for_cleanup(folder_entity.name)
    for downloaded, uploaded in zip(new_paths, uploaded_paths):
        assert_true(os.path.exists(downloaded))
        assert_true(filecmp.cmp(downloaded, uploaded))
        schedule_for_cleanup(downloaded)

    # Test query get using a Table with an entity column
    # This should be replaced when Table File Views are implemented in the client
    cols = [synapseclient.Column(name='id', columnType='ENTITYID')]

    schema1 = syn.store(
        synapseclient.Schema(name='Foo Table',
                             columns=cols,
                             parent=project_entity))
    schedule_for_cleanup(schema1.id)

    data1 = [[x.id] for x in file_entities]

    syn.store(
        synapseclient.RowSet(schema=schema1,
                             rows=[synapseclient.Row(r) for r in data1]))

    time.sleep(3)  # get -q are eventually consistent
    # Test Table/View query get
    output = run('synapse', '--skip-checks', 'get', '-q',
                 "select id from %s" % schema1.id)
    # Verify that we downloaded files:
    new_paths = [
        os.path.join('.', os.path.basename(f)) for f in uploaded_paths[:-1]
    ]
    new_paths.append(os.path.join('.', os.path.basename(uploaded_paths[-1])))
    schedule_for_cleanup(folder_entity.name)
    for downloaded, uploaded in zip(new_paths, uploaded_paths):
        assert_true(os.path.exists(downloaded))
        assert_true(filecmp.cmp(downloaded, uploaded))
        schedule_for_cleanup(downloaded)

    schedule_for_cleanup(new_paths[0])
Ejemplo n.º 27
0
    global synapse
    synapse = synapseclient.Synapse()
    synapse = synapseclient.login(args.synapseUser,
                                  args.synapsePassword,
                                  rememberMe=False)

    MAX_FILES_PER_FOLDER = 10
    # create 'n' files in sourceProject (say, 10 to a folder)
    folder = None
    folderCount = 0
    filesInFolder = 0
    for i in range(args.numberOfFiles):
        if folder is None or filesInFolder >= MAX_FILES_PER_FOLDER:
            # create folder
            folder = synapseclient.Folder(str(folderCount),
                                          parent=args.sourceProject)
            folder = synapse.store(folder)
            filesInFolder = 0
            folderCount = folderCount + 1
        # create file, upload to folder
        # just make an arbitrary string
        s = randomword(1000)
        filePath = os.path.join(tempfile.gettempdir(),
                                'file_' + str(i) + '.txt')
        with open(filePath, 'w') as myfile:
            myfile.write(s)
        file = synapseclient.File(
            filePath,
            parent=folder,
            annotations={'someAnnotName': 'someAnnotValue'})
        file = synapse.store(file)
def test_command_line_using_paths():
    # Create a Project
    project_entity = syn.store(synapseclient.Project(name=str(uuid.uuid4())))
    schedule_for_cleanup(project_entity.id)

    # Create a Folder in Project
    folder_entity = syn.store(
        synapseclient.Folder(name=str(uuid.uuid4()), parent=project_entity))

    # Create and upload a file in Folder
    filename = utils.make_bogus_data_file()
    schedule_for_cleanup(filename)
    file_entity = syn.store(synapseclient.File(filename, parent=folder_entity))

    # Verify that we can use show with a filename
    output = run('synapse', '--skip-checks', 'show', filename)
    id = parse(r'File: %s\s+\((syn\d+)\)\s+' % os.path.split(filename)[1],
               output)
    assert_equals(file_entity.id, id)

    # Verify that limitSearch works by making sure we get the file entity
    # that's inside the folder
    file_entity2 = syn.store(
        synapseclient.File(filename, parent=project_entity))
    output = run('synapse', '--skip-checks', 'get', '--limitSearch',
                 folder_entity.id, filename)
    id = parse(r'Associated file: .* with synapse ID (syn\d+)', output)
    name = parse(r'Associated file: (.*) with synapse ID syn\d+', output)
    assert_equals(file_entity.id, id)
    assert_true(utils.equal_paths(name, filename))

    # Verify that set-provenance works with filepath
    repo_url = 'https://github.com/Sage-Bionetworks/synapsePythonClient'
    output = run('synapse', '--skip-checks', 'set-provenance', '-id',
                 file_entity2.id, '-name', 'TestActivity', '-description',
                 'A very excellent provenance', '-used', filename, '-executed',
                 repo_url, '-limitSearch', folder_entity.id)
    activity_id = parse(r'Set provenance record (\d+) on entity syn\d+',
                        output)

    output = run('synapse', '--skip-checks', 'get-provenance', '-id',
                 file_entity2.id)
    activity = json.loads(output)
    assert_equals(activity['name'], 'TestActivity')
    assert_equals(activity['description'], 'A very excellent provenance')

    # Verify that store works with provenance specified with filepath
    repo_url = 'https://github.com/Sage-Bionetworks/synapsePythonClient'
    filename2 = utils.make_bogus_data_file()
    schedule_for_cleanup(filename2)
    output = run('synapse', '--skip-checks', 'add', filename2, '-parentid',
                 project_entity.id, '-used', filename, '-executed',
                 '%s %s' % (repo_url, filename))
    entity_id = parse(r'Created/Updated entity:\s+(syn\d+)\s+', output)
    output = run('synapse', '--skip-checks', 'get-provenance', '-id',
                 entity_id)
    activity = json.loads(output)
    a = [a for a in activity['used'] if not a['wasExecuted']]
    assert_in(a[0]['reference']['targetId'], [file_entity.id, file_entity2.id])

    # Test associate command
    # I have two files in Synapse filename and filename2
    path = tempfile.mkdtemp()
    schedule_for_cleanup(path)
    shutil.copy(filename, path)
    shutil.copy(filename2, path)
    run('synapse', '--skip-checks', 'associate', path, '-r')
    run('synapse', '--skip-checks', 'show', filename)
Ejemplo n.º 29
0
def test_migrate_project(request, syn, schedule_for_cleanup,
                         storage_location_id):
    test_name = request.node.name
    project_name = "{}-{}".format(test_name, uuid.uuid4())
    project = synapseclient.Project(name=project_name)
    project_entity = syn.store(project)

    file_0_path = _create_temp_file()
    schedule_for_cleanup(file_0_path)
    file_0_name = "{}-{}".format(test_name, 1)
    file_0 = synapseclient.File(name=file_0_name,
                                path=file_0_path,
                                parent=project_entity)
    file_0_entity = syn.store(file_0)
    default_storage_location_id = file_0_entity._file_handle[
        'storageLocationId']

    folder_1_name = "{}-{}-{}".format(test_name, 1, uuid.uuid4())
    folder_1 = synapseclient.Folder(parent=project_entity, name=folder_1_name)
    folder_1_entity = syn.store(folder_1)

    file_1_path = _create_temp_file()
    schedule_for_cleanup(file_1_path)
    file_1_name = "{}-{}".format(test_name, 1)
    file_1 = synapseclient.File(name=file_1_name,
                                path=file_1_path,
                                parent=folder_1_entity)
    file_1_entity = syn.store(file_1)

    file_2_path = _create_temp_file()
    schedule_for_cleanup(file_2_path)
    file_2_name = "{}-{}".format(test_name, 2)
    file_2 = synapseclient.File(name=file_2_name,
                                path=file_2_path,
                                parent=folder_1_entity)
    file_2_entity = syn.store(file_2)

    # file 3 shares the same file handle id as file 1
    file_3_path = file_1_path
    file_3_name = "{}-{}".format(test_name, 3)
    file_3 = synapseclient.File(name=file_3_name,
                                path=file_3_path,
                                parent=folder_1_entity)
    file_3.dataFileHandleId = file_1_entity.dataFileHandleId
    file_3_entity = syn.store(file_3)

    table_1_cols = [
        synapseclient.Column(name='file_col_1', columnType='FILEHANDLEID'),
        synapseclient.Column(name='num', columnType='INTEGER'),
        synapseclient.Column(name='file_col_2', columnType='FILEHANDLEID'),
    ]
    table_1 = syn.store(
        synapseclient.Schema(name=test_name,
                             columns=table_1_cols,
                             parent=folder_1_entity))
    table_1_file_col_1_1 = _create_temp_file()
    table_1_file_handle_1 = syn.uploadFileHandle(table_1_file_col_1_1, table_1)
    table_1_file_col_1_2 = _create_temp_file()
    table_1_file_handle_2 = syn.uploadFileHandle(table_1_file_col_1_2, table_1)
    table_1_file_col_2_1 = _create_temp_file()
    table_1_file_handle_3 = syn.uploadFileHandle(table_1_file_col_2_1, table_1)
    table_1_file_col_2_2 = _create_temp_file()
    table_1_file_handle_4 = syn.uploadFileHandle(table_1_file_col_2_2, table_1)

    data = [
        [table_1_file_handle_1['id'], 1, table_1_file_handle_2['id']],
        [table_1_file_handle_3['id'], 2, table_1_file_handle_4['id']],
    ]

    table_1_entity = syn.store(
        synapseclient.RowSet(schema=table_1,
                             rows=[synapseclient.Row(r) for r in data]))

    db_path = tempfile.NamedTemporaryFile(delete=False).name
    schedule_for_cleanup(db_path)

    index_result = synapseutils.index_files_for_migration(
        syn,
        project_entity,
        storage_location_id,
        db_path,
        file_version_strategy='new',
        include_table_files=True,
    )

    counts_by_status = index_result.get_counts_by_status()
    assert counts_by_status['INDEXED'] == 8
    assert counts_by_status['ERRORED'] == 0

    migration_result = synapseutils.migrate_indexed_files(syn,
                                                          db_path,
                                                          force=True)

    file_0_entity_updated = syn.get(utils.id_of(file_0_entity),
                                    downloadFile=False)
    file_1_entity_updated = syn.get(utils.id_of(file_1_entity),
                                    downloadFile=False)
    file_2_entity_updated = syn.get(utils.id_of(file_2_entity),
                                    downloadFile=False)
    file_3_entity_updated = syn.get(utils.id_of(file_3_entity),
                                    downloadFile=False)
    file_handles = [
        f['_file_handle'] for f in (
            file_0_entity_updated,
            file_1_entity_updated,
            file_2_entity_updated,
            file_3_entity_updated,
        )
    ]

    table_1_id = utils.id_of(table_1_entity)
    results = syn.tableQuery("select file_col_1, file_col_2 from {}".format(
        utils.id_of(table_1_entity)))
    table_file_handles = []
    for row in results:
        for file_handle_id in row[2:]:
            file_handle = syn._getFileHandleDownload(
                file_handle_id, table_1_id,
                objectType='TableEntity')['fileHandle']
            table_file_handles.append(file_handle)
    file_handles.extend(table_file_handles)

    _assert_storage_location(file_handles, storage_location_id)
    assert storage_location_id != default_storage_location_id

    with sqlite3.connect(db_path) as conn:
        cursor = conn.cursor()
        query_result = cursor.execute(
            "select status, count(*) from migrations where type in (?, ?) group by status",
            (_MigrationType.FILE.value,
             _MigrationType.TABLE_ATTACHED_FILE.value)).fetchall()

        counts = {r[0]: r[1] for r in query_result}

        # should only be one status and they should all be migrated
        # should be 3 migrated files entities + 4 migrated table attached files
        assert len(counts) == 1
        assert counts[_MigrationStatus.MIGRATED.value] == 8

    csv_file = tempfile.NamedTemporaryFile(delete=False)
    schedule_for_cleanup(csv_file.name)
    migration_result.as_csv(csv_file.name)
    with open(csv_file.name, 'r') as csv_file_in:
        csv_contents = csv_file_in.read()

    table_1_id = table_1_entity['tableId']

    # assert the content of the csv. we don't assert any particular order of the lines
    # but the presence of the expected lines and the correct # of lines
    csv_lines = csv_contents.split('\n')
    assert "id,type,version,row_id,col_name,from_storage_location_id,from_file_handle_id,to_file_handle_id,status,exception" in csv_lines  # noqa
    assert f"{file_0_entity.id},file,,,,{default_storage_location_id},{file_0_entity.dataFileHandleId},{file_0_entity_updated.dataFileHandleId},MIGRATED," in csv_lines  # noqa
    assert f"{file_1_entity.id},file,,,,{default_storage_location_id},{file_1_entity.dataFileHandleId},{file_1_entity_updated.dataFileHandleId},MIGRATED," in csv_lines  # noqa
    assert f"{file_2_entity.id},file,,,,{default_storage_location_id},{file_2_entity.dataFileHandleId},{file_2_entity_updated.dataFileHandleId},MIGRATED," in csv_lines  # noqa
    assert f"{file_3_entity.id},file,,,,{default_storage_location_id},{file_3_entity.dataFileHandleId},{file_3_entity_updated.dataFileHandleId},MIGRATED," in csv_lines  # noqa
    assert f"{table_1_id},table,1,1,file_col_1,{default_storage_location_id},{table_1_file_handle_1['id']},{table_file_handles[0]['id']},MIGRATED," in csv_lines  # noqa
    assert f"{table_1_id},table,1,1,file_col_2,{default_storage_location_id},{table_1_file_handle_2['id']},{table_file_handles[1]['id']},MIGRATED," in csv_lines  # noqa
    assert f"{table_1_id},table,1,2,file_col_1,{default_storage_location_id},{table_1_file_handle_3['id']},{table_file_handles[2]['id']},MIGRATED," in csv_lines  # noqa
    assert f"{table_1_id},table,1,2,file_col_2,{default_storage_location_id},{table_1_file_handle_4['id']},{table_file_handles[3]['id']},MIGRATED," in csv_lines  # noqa
    assert "" in csv_lines  # expect trailing newline in a csv
def test_command_line_client():
    print("TESTING CMD LINE CLIENT")
    # Create a Project
    output = run('synapse', '--skip-checks', 'create', '-name',
                 str(uuid.uuid4()), '-description',
                 'test of command line client', 'Project')
    project_id = parse(r'Created entity:\s+(syn\d+)\s+', output)
    schedule_for_cleanup(project_id)

    # Create a File
    filename = utils.make_bogus_data_file()
    schedule_for_cleanup(filename)
    output = run('synapse', '--skip-checks', 'add', '-name', 'BogusFileEntity',
                 '-description', 'Bogus data to test file upload', '-parentid',
                 project_id, filename)
    file_entity_id = parse(r'Created/Updated entity:\s+(syn\d+)\s+', output)

    # Verify that we stored the file in Synapse
    f1 = syn.get(file_entity_id)
    fh = syn._getFileHandle(f1.dataFileHandleId)
    assert_equals(fh['concreteType'],
                  'org.sagebionetworks.repo.model.file.S3FileHandle')

    # Get File from the command line
    output = run('synapse', '--skip-checks', 'get', file_entity_id)
    downloaded_filename = parse(r'Downloaded file:\s+(.*)', output)
    schedule_for_cleanup(downloaded_filename)
    assert_true(os.path.exists(downloaded_filename))
    assert_true(filecmp.cmp(filename, downloaded_filename))

    # Update the File
    filename = utils.make_bogus_data_file()
    schedule_for_cleanup(filename)
    output = run('synapse', '--skip-checks', 'store', '--id', file_entity_id,
                 filename)

    # Get the File again
    output = run('synapse', '--skip-checks', 'get', file_entity_id)
    downloaded_filename = parse(r'Downloaded file:\s+(.*)', output)
    schedule_for_cleanup(downloaded_filename)
    assert_true(os.path.exists(downloaded_filename))
    assert_true(filecmp.cmp(filename, downloaded_filename))

    # Store the same file and don't force a new version

    # Get the existing file to determine it's current version
    current_file = syn.get(file_entity_id, downloadFile=False)
    current_version = current_file.versionNumber

    # Store it without forcing version
    output = run('synapse', '--skip-checks', 'store', '--noForceVersion',
                 '--id', file_entity_id, filename)

    # Get the File again and check that the version did not change
    new_file = syn.get(file_entity_id, downloadFile=False)
    new_version = new_file.versionNumber
    assert_equals(current_version, new_version)

    # Move the file to new folder
    folder = syn.store(synapseclient.Folder(parentId=project_id))
    output = run('synapse', 'mv', '--id', file_entity_id, '--parentid',
                 folder.id)
    movedFile = syn.get(file_entity_id, downloadFile=False)
    assert_equals(movedFile.parentId, folder.id)

    # Test Provenance
    repo_url = 'https://github.com/Sage-Bionetworks/synapsePythonClient'
    output = run('synapse', '--skip-checks', 'set-provenance', '-id',
                 file_entity_id, '-name', 'TestActivity', '-description',
                 'A very excellent provenance', '-used', file_entity_id,
                 '-executed', repo_url)

    output = run('synapse', '--skip-checks', 'get-provenance', '--id',
                 file_entity_id)

    activity = json.loads(output)
    assert_equals(activity['name'], 'TestActivity')
    assert_equals(activity['description'], 'A very excellent provenance')

    used = utils._find_used(activity, lambda used: 'reference' in used)
    assert_equals(used['reference']['targetId'], file_entity_id)

    used = utils._find_used(activity, lambda used: 'url' in used)
    assert_equals(used['url'], repo_url)
    assert_true(used['wasExecuted'])

    # Note: Tests shouldn't have external dependencies
    #       but this is a pretty picture of Singapore
    singapore_url = 'http://upload.wikimedia.org/wikipedia/commons/' \
                    'thumb/3/3e/1_singapore_city_skyline_dusk_panorama_2011.jpg' \
                    '/1280px-1_singapore_city_skyline_dusk_panorama_2011.jpg'

    # Test external file handle
    output = run('synapse', '--skip-checks', 'add', '-name', 'Singapore',
                 '-description', 'A nice picture of Singapore', '-parentid',
                 project_id, singapore_url)
    exteral_entity_id = parse(r'Created/Updated entity:\s+(syn\d+)\s+', output)

    # Verify that we created an external file handle
    f2 = syn.get(exteral_entity_id)
    fh = syn._getFileHandle(f2.dataFileHandleId)
    assert_equals(fh['concreteType'],
                  'org.sagebionetworks.repo.model.file.ExternalFileHandle')

    output = run('synapse', '--skip-checks', 'get', exteral_entity_id)
    downloaded_filename = parse(r'Downloaded file:\s+(.*)', output)
    schedule_for_cleanup(downloaded_filename)
    assert_true(os.path.exists(downloaded_filename))

    # Delete the Project
    run('synapse', '--skip-checks', 'delete', project_id)