def test_get_local_file():
    """Tests synapse.get() with local a local file """
    new_path = utils.make_bogus_data_file()
    schedule_for_cleanup(new_path)
    folder = Folder('TestFindFileFolder', parent=project, description='A place to put my junk')
    folder = syn.createEntity(folder)

    #Get an nonexistent file in Synapse
    assert_raises(SynapseError, syn.get, new_path)

    #Get a file really stored in Synapse
    ent_folder = syn.store(File(new_path, parent=folder))
    ent2 = syn.get(new_path)
    assert ent_folder.id==ent2.id and ent_folder.versionNumber==ent2.versionNumber

    #Get a file stored in Multiple locations #should display warning
    ent = syn.store(File(new_path, parent=project))
    ent = syn.get(new_path)

    #Get a file stored in multiple locations with limit set
    ent = syn.get(new_path, limitSearch=folder.id)
    assert ent.id == ent_folder.id and ent.versionNumber==ent_folder.versionNumber

    #Get a file that exists but such that limitSearch removes them and raises error
    assert_raises(SynapseError, syn.get, new_path, limitSearch='syn1')
def test_get_and_store_by_name_and_parent_id():
    project = create_project()

    path = utils.make_bogus_data_file()
    schedule_for_cleanup(path)

    f = File(path, name='Foobarbat', parent=project)
    f2 = syn.store(f)
    f = syn.get(f)

    assert f.id == f2.id
    assert f.name == f2.name
    assert f.parentId == f2.parentId

    ## new file
    path = utils.make_bogus_data_file()
    schedule_for_cleanup(path)

    ## should create a new version of the previous File entity
    f3 = File(path,
              name='Foobarbat',
              parent=project,
              description='banana',
              junk=1234)
    f3 = syn.store(f3)

    ## should be an update of the existing entity with the same name and parent
    assert f3.id == f.id
    assert f3.description == 'banana'
    assert f3.junk == [1234]
    assert filecmp.cmp(path, f3.path)
def test_getChildren(syn, schedule_for_cleanup):
    # setup a hierarchy for folders
    # PROJECT
    # |     \
    # File   Folder
    #           |
    #         File
    project_name = str(uuid.uuid1())
    test_project = syn.store(Project(name=project_name))
    folder = syn.store(Folder(name="firstFolder", parent=test_project))
    syn.store(
        File(path="~/doesntMatter.txt",
             name="file inside folders",
             parent=folder,
             synapseStore=False))
    project_file = syn.store(
        File(path="~/doesntMatterAgain.txt",
             name="file inside project",
             parent=test_project,
             synapseStore=False))
    schedule_for_cleanup(test_project)

    expected_id_set = {project_file.id, folder.id}
    children_id_set = {x['id'] for x in syn.getChildren(test_project.id)}
    assert expected_id_set == children_id_set
def test_store_activity():
    # Create a File and an Activity
    path = utils.make_bogus_binary_file()
    schedule_for_cleanup(path)
    entity = File(path, name='Hinkle horn honking holes', parent=project)
    honking = Activity(name='Hinkle horn honking', 
                       description='Nettlebed Cave is a limestone cave located on the South Island of New Zealand.')
    honking.used('http://www.flickr.com/photos/bevanbfree/3482259379/')
    honking.used('http://www.flickr.com/photos/bevanbfree/3482185673/')

    # This doesn't set the ID of the Activity
    entity = syn.store(entity, activity=honking)

    # But this does
    honking = syn.getProvenance(entity.id)

    # Verify the Activity
    assert honking['name'] == 'Hinkle horn honking'
    assert len(honking['used']) == 2
    assert honking['used'][0]['concreteType'] == 'org.sagebionetworks.repo.model.provenance.UsedURL'
    assert honking['used'][0]['wasExecuted'] == False
    assert honking['used'][0]['url'].startswith('http://www.flickr.com/photos/bevanbfree/3482')
    assert honking['used'][1]['concreteType'] == 'org.sagebionetworks.repo.model.provenance.UsedURL'
    assert honking['used'][1]['wasExecuted'] == False

    # Store another Entity with the same Activity
    entity = File('http://en.wikipedia.org/wiki/File:Nettlebed_cave.jpg', 
                  name='Nettlebed Cave', parent=project, synapseStore=False)
    entity = syn.store(entity, activity=honking)

    # The Activities should match
    honking2 = syn.getProvenance(entity)
    assert honking['id'] == honking2['id']
Beispiel #5
0
def test_syncFromSynapse():
    """This function tests recursive download as defined in syncFromSynapse
    most of the functionality of this function are already tested in the 
    tests/integration/test_command_line_client::test_command_get_recursive_and_query

    which means that the only test if for path=None
    """
    # Create a Project
    project_entity = syn.store(synapseclient.Project(name=str(uuid.uuid4())))
    schedule_for_cleanup(project_entity.id)

    # Create a Folder in Project
    folder_entity = syn.store(
        Folder(name=str(uuid.uuid4()), parent=project_entity))

    # Create and upload two files in Folder
    uploaded_paths = []
    for i in range(2):
        f = utils.make_bogus_data_file()
        uploaded_paths.append(f)
        schedule_for_cleanup(f)
        syn.store(File(f, parent=folder_entity))
    # Add a file in the project level as well
    f = utils.make_bogus_data_file()
    uploaded_paths.append(f)
    schedule_for_cleanup(f)
    syn.store(File(f, parent=project_entity))

    # Test recursive get
    output = synapseutils.syncFromSynapse(syn, project_entity)

    assert_equals(len(output), len(uploaded_paths))
    for f in output:
        assert_in(f.path, uploaded_paths)
def test_synapseStore_flag():
    # Store a path to a local file
    path = utils.make_bogus_data_file()
    schedule_for_cleanup(path)
    bogus = File(path, name='Totally bogus data', parent=project, synapseStore=False)
    bogus = syn.store(bogus)
    
    # Verify the thing can be downloaded as a URL
    bogus = syn.get(bogus, downloadFile=False)
    assert bogus.name == 'Totally bogus data'
    assert bogus.path == path, "Path: %s\nExpected: %s" % (bogus.path, path)
    assert bogus.synapseStore == False

    # Make sure the test runs on Windows and other OS's
    if path[0].isalpha() and path[1]==':':
        # A Windows file URL looks like this: file:///c:/foo/bar/bat.txt
        expected_url = 'file:///' + path.replace("\\","/")
    else:
        expected_url = 'file://' + path

    assert bogus.externalURL == expected_url, 'URL: %s\nExpected %s' % (bogus.externalURL, expected_url)

    # A file path that doesn't exist should still work
    bogus = File('/path/to/local/file1.xyz', parentId=project.id, synapseStore=False)
    bogus = syn.store(bogus)
    assert_raises(IOError, syn.get, bogus)
    assert bogus.synapseStore == False

    # Try a URL
    bogus = File('http://dev-versions.synapse.sagebase.org/synapsePythonClient', parent=project, synapseStore=False)
    bogus = syn.store(bogus)
    bogus = syn.get(bogus)
    assert bogus.synapseStore == False
    def test_upload__error(self, syn):
        """Verify that if an item upload fails the error is raised in the main thread
        and any running Futures are cancelled"""

        item_1 = _SyncUploadItem(File(path='/tmp/foo', parentId='syn123'), [],
                                 [], {})
        item_2 = _SyncUploadItem(File(path='/tmp/bar', parentId='syn123'), [],
                                 [], {})
        items = [item_1, item_2]

        def syn_store_side_effect(entity, *args, **kwargs):
            if entity.path == entity.path:
                raise ValueError()
            return Mock()

        uploader = _SyncUploader(syn, get_executor())
        original_abort = uploader._abort

        def abort_side_effect(futures):
            return original_abort(futures)

        with patch.object(syn, 'store') as mock_syn_store, \
                patch.object(uploader, '_abort') as mock_abort:

            mock_syn_store.side_effect = syn_store_side_effect
            mock_abort.side_effect = abort_side_effect
            with pytest.raises(ValueError):
                uploader.upload(items)

            # it would be aborted with Futures
            mock_abort.assert_called_once_with([ANY])
            isinstance(mock_abort.call_args_list[0][0], Future)
def test_extract_file_entity_metadata__ensure_correct_row_metadata(syn):
    # Test for SYNPY-692, where 'contentType' was incorrectly set on all rows except for the very first row.

    # create 2 file entities with different metadata
    entity1 = File(parent='syn123',
                   id='syn456',
                   contentType='text/json',
                   path='path1',
                   name='entity1',
                   synapseStore=True)
    entity2 = File(parent='syn789',
                   id='syn890',
                   contentType='text/html',
                   path='path2',
                   name='entity2',
                   synapseStore=False)
    files = [entity1, entity2]

    # we don't care about provenance metadata in this case
    with patch.object(synapseutils.sync,
                      "_get_file_entity_provenance_dict",
                      return_value={}):
        # method under test
        keys, data = synapseutils.sync._extract_file_entity_metadata(
            syn, files)

    # compare source entity metadata gainst the extracted metadata
    for file_entity, file_row_data in zip(files, data):
        for key in keys:
            if key == 'parent':  # workaroundd for parent/parentId inconsistency. (SYNPY-697)
                assert file_entity.get('parentId') == file_row_data.get(key)
            else:
                assert file_entity.get(key) == file_row_data.get(key)
def test_store_with_create_or_update_flag():
    project = create_project()

    filepath = utils.make_bogus_binary_file()
    bogus1 = File(filepath, name='Bogus Test File', parent=project)

    bogus1 = syn.store(bogus1, createOrUpdate=True)

    # Create a different file with the same name and parent
    new_filepath = utils.make_bogus_binary_file()
    bogus1.path = new_filepath

    # Expected behavior is that a new version of the first File will be created
    bogus2 = syn.store(bogus1, createOrUpdate=True)

    assert bogus2.id == bogus1.id
    assert bogus2.versionNumber == 2
    assert not filecmp.cmp(bogus2.path, filepath)

    bogus2a = syn.get(bogus2.id)
    assert bogus2a.id == bogus1.id
    assert bogus2a.versionNumber == 2
    assert filecmp.cmp(bogus2.path, bogus2a.path)

    # Create yet another file with the same name and parent
    newer_filepath = utils.make_bogus_binary_file()
    bogus3 = File(newer_filepath, name='Bogus Test File', parent=project)

    # Expected behavior is raising an exception with a 409 error
    assert_raises(requests.exceptions.HTTPError,
                  syn.store,
                  bogus3,
                  createOrUpdate=False)
    def _view_setup(cls):
        # set up a file view
        folder = syn.store(
            Folder(name="PartialRowTestFolder" + str(uuid.uuid4()),
                   parent=project))
        syn.store(
            File("~/path/doesnt/matter",
                 name="f1",
                 parent=folder,
                 synapseStore=False))
        syn.store(
            File("~/path/doesnt/matter/again",
                 name="f2",
                 parent=folder,
                 synapseStore=False))

        cols = [
            Column(name='foo', columnType='STRING', maximumSize=1000),
            Column(name='bar', columnType='STRING')
        ]
        return syn.store(
            EntityViewSchema(name='PartialRowTestViews' + str(uuid.uuid4()),
                             columns=cols,
                             addDefaultViewColumns=False,
                             parent=project,
                             scopes=[folder]))
def test_syncFromSynapse__manifest_is_root(
        mock__get_file_entity_provenance_dict, mock_generateManifest, syn):
    """
    Verify manifest argument equal to "root" that pass in to syncFromSynapse, it will create root_manifest file only.
    """

    project = Project(name="the project", parent="whatever", id="syn123")
    file1 = File(name="a file", parent=project, id="syn456")
    folder = Folder(name="a folder", parent=project, id="syn789")
    file2 = File(name="a file2", parent=folder, id="syn789123")

    # Structure of nested project
    # project
    #    |---> file1
    #    |---> folder
    #             |---> file2

    entities = {
        file1.id: file1,
        folder.id: folder,
        file2.id: file2,
    }

    def syn_get_side_effect(entity, *args, **kwargs):
        return entities[id_of(entity)]

    mock__get_file_entity_provenance_dict.return_value = {}

    with patch.object(syn, "getChildren", side_effect=[[folder, file1], [file2]]),\
            patch.object(syn, "get", side_effect=syn_get_side_effect) as patch_syn_get:

        synapseutils.syncFromSynapse(syn,
                                     project,
                                     path="./",
                                     downloadFile=False,
                                     manifest="root")
        assert patch_syn_get.call_args_list == [
            call(
                file1['id'],
                downloadLocation="./",
                ifcollision='overwrite.local',
                followLink=False,
                downloadFile=False,
            ),
            call(
                file2['id'],
                downloadLocation="./a folder",
                ifcollision='overwrite.local',
                followLink=False,
                downloadFile=False,
            )
        ]

        assert mock_generateManifest.call_count == 1

        call_files = mock_generateManifest.call_args_list[0][0][1]
        assert len(call_files) == 2
        assert call_files[0].id == "syn456"
        assert call_files[1].id == "syn789123"
Beispiel #12
0
def test_walk():
    walked = []
    firstfile = utils.make_bogus_data_file()
    schedule_for_cleanup(firstfile)
    project_entity = syn.store(Project(name=str(uuid.uuid4())))
    schedule_for_cleanup(project_entity.id)
    folder_entity = syn.store(
        Folder(name=str(uuid.uuid4()), parent=project_entity))
    schedule_for_cleanup(folder_entity.id)
    second_folder = syn.store(
        Folder(name=str(uuid.uuid4()), parent=project_entity))
    schedule_for_cleanup(second_folder.id)
    file_entity = syn.store(File(firstfile, parent=project_entity))
    schedule_for_cleanup(file_entity.id)

    walked.append(((project_entity.name, project_entity.id), [
        (folder_entity.name, folder_entity.id),
        (second_folder.name, second_folder.id)
    ], [(file_entity.name, file_entity.id)]))

    nested_folder = syn.store(
        Folder(name=str(uuid.uuid4()), parent=folder_entity))
    schedule_for_cleanup(nested_folder.id)
    secondfile = utils.make_bogus_data_file()
    schedule_for_cleanup(secondfile)
    second_file = syn.store(File(secondfile, parent=nested_folder))
    schedule_for_cleanup(second_file.id)
    thirdfile = utils.make_bogus_data_file()
    schedule_for_cleanup(thirdfile)
    third_file = syn.store(File(thirdfile, parent=second_folder))
    schedule_for_cleanup(third_file.id)

    walked.append(((os.path.join(project_entity.name,
                                 folder_entity.name), folder_entity.id),
                   [(nested_folder.name, nested_folder.id)], []))
    walked.append(
        ((os.path.join(os.path.join(project_entity.name, folder_entity.name),
                       nested_folder.name), nested_folder.id), [],
         [(second_file.name, second_file.id)]))
    walked.append(((os.path.join(project_entity.name, second_folder.name),
                    second_folder.id), [], [(third_file.name, third_file.id)]))

    temp = synapseutils.walk(syn, project_entity.id)
    temp = list(temp)
    #Must sort the tuples returned, because order matters for the assert
    #Folders are returned in a different ordering depending on the name
    for i in walked:
        for x in i:
            if type(x) == list:
                x = x.sort()
    for i in temp:
        for x in i:
            if type(x) == list:
                x = x.sort()
        assert i in walked

    print("CHECK: synapseutils.walk on a file should return empty generator")
    temp = synapseutils.walk(syn, second_file.id)
    assert list(temp) == []
def test_synapseStore_flag():
    """Test storing entities while setting the synapseStore flag to False"""
    project = create_project()

    ## store a path to a local file (synapseStore=False)
    path = utils.make_bogus_data_file()
    schedule_for_cleanup(path)
    f1 = File(path,
              name='Totally bogus data',
              parent=project,
              synapseStore=False)

    f1 = syn.store(f1)

    f1a = syn.get(f1.id, downloadFile=False)

    assert f1a.name == 'Totally bogus data'
    assert f1a.path == path, 'path=' + str(f1a.path) + '; expected=' + path
    assert f1a.synapseStore == False

    ## make sure the test runs on Windows and other OS's
    if path[0].isalpha() and path[1] == ':':
        ## a windows file URL looks like this: file:///c:/foo/bar/bat.txt
        expected_url = 'file:///' + path
    else:
        expected_url = 'file://' + path

    assert f1a.externalURL == expected_url, 'unexpected externalURL: ' + f1a.externalURL

    ## a file path that doesn't exist should still work
    f2 = File('/path/to/local/file1.xyz',
              parentId=project.id,
              synapseStore=False)
    f2 = syn.store(f2)
    try:
        syn.get(f2)
        assert False
    except Exception as err:
        assert err.message.startswith("Could not download non-existent file")
    assert f1a.synapseStore == False

    ## Try a URL
    f3 = File('http://dev-versions.synapse.sagebase.org/synapsePythonClient',
              parent=project,
              synapseStore=False)
    f3 = syn.store(f3)
    f3a = syn.get(f3)
    assert f1a.synapseStore == False
def test_round_trip(syn, project, schedule_for_cleanup):
    fhid = None
    filepath = utils.make_bogus_binary_file(MIN_PART_SIZE + 777771)
    try:
        fhid = multipart_upload_file(syn, filepath)

        # Download the file and compare it with the original
        junk = File(parent=project, dataFileHandleId=fhid)
        junk.properties.update(syn._createEntity(junk.properties))
        (tmp_f, tmp_path) = tempfile.mkstemp()
        schedule_for_cleanup(tmp_path)

        junk['path'] = syn._downloadFileHandle(fhid, junk['id'], 'FileEntity',
                                               tmp_path)
        assert filecmp.cmp(filepath, junk.path)

    finally:
        try:
            if 'junk' in locals():
                syn.delete(junk)
        except Exception:
            print(traceback.format_exc())
        try:
            os.remove(filepath)
        except Exception:
            print(traceback.format_exc())
def test_multipart_upload_big_string(syn, project, schedule_for_cleanup):
    cities = [
        "Seattle", "Portland", "Vancouver", "Victoria", "San Francisco",
        "Los Angeles", "New York", "Oaxaca", "Cancún", "Curaçao", "जोधपुर",
        "অসম", "ལྷ་ས།", "ཐིམ་ཕུ་", "دبي", "አዲስ አበባ", "São Paulo",
        "Buenos Aires", "Cartagena", "Amsterdam", "Venice", "Rome",
        "Dubrovnik", "Sarajevo", "Madrid", "Barcelona", "Paris", "Αθήνα",
        "Ρόδος", "København", "Zürich", "金沢市", "서울", "แม่ฮ่องสอน", "Москва"
    ]

    text = "Places I wanna go:\n"
    while len(text.encode('utf-8')) < MIN_PART_SIZE:
        text += ", ".join(random.choice(cities) for i in range(5000)) + "\n"

    fhid = multipart_upload_string(syn, text)

    # Download the file and compare it with the original
    junk = File(parent=project, dataFileHandleId=fhid)
    junk.properties.update(syn._createEntity(junk.properties))
    (tmp_f, tmp_path) = tempfile.mkstemp()
    schedule_for_cleanup(tmp_path)

    junk['path'] = syn._downloadFileHandle(fhid, junk['id'], "FileEntity",
                                           tmp_path)

    with open(junk.path, encoding='utf-8') as f:
        retrieved_text = f.read()

    assert retrieved_text == text
def test_ftp_download():
    """Test downloading an Entity that points to a file on an FTP server. """

    # Another test with an external reference. This is because we only need to test FTP download; not upload. Also so we don't have to maintain an FTP server just for this purpose.
    # Make an entity that points to an FTP server file.
    entity = File(parent=project['id'], name='1KB.zip')
    fileHandle = {}
    fileHandle['externalURL'] = 'ftp://speedtest.tele2.net/1KB.zip'
    fileHandle["fileName"] = entity.name
    fileHandle["contentType"] = "application/zip"
    fileHandle["contentMd5"] = '0f343b0931126a20f133d67c2b018a3b'
    fileHandle["contentSize"] = 1024
    fileHandle[
        "concreteType"] = "org.sagebionetworks.repo.model.file.ExternalFileHandle"
    fileHandle = syn.restPOST('/externalFileHandle', json.dumps(fileHandle),
                              syn.fileHandleEndpoint)
    entity.dataFileHandleId = fileHandle['id']
    entity = syn.store(entity)

    # Download the entity and check that MD5 matches expected
    FTPfile = syn.get(entity.id,
                      downloadLocation=os.getcwd(),
                      downloadFile=True)
    assert FTPfile.md5 == utils.md5_for_file(FTPfile.path).hexdigest()
    schedule_for_cleanup(entity)
    os.remove(FTPfile.path)
Beispiel #17
0
def test_syncFromSynapse__given_file_id(test_state):
    file_path = utils.make_bogus_data_file()
    test_state.schedule_for_cleanup(file_path)
    file = test_state.syn.store(File(file_path, name=str(uuid.uuid4()), parent=test_state.project, synapseStore=False))
    all_files = synapseutils.syncFromSynapse(test_state.syn, file.id)
    assert 1 == len(all_files)
    assert file == all_files[0]
def test_uploadFileEntity(syn, project, schedule_for_cleanup):
    # Create a FileEntity
    # Dictionaries default to FileEntity as a type
    fname = utils.make_bogus_data_file()
    schedule_for_cleanup(fname)
    entity = File(name='fooUploadFileEntity',
                  path=fname,
                  parentId=project['id'],
                  description='A test file entity')
    entity = syn.store(entity)

    # Download and verify
    entity = syn.get(entity)

    assert entity['files'][0] == os.path.basename(fname)
    assert filecmp.cmp(fname, entity['path'])

    # Check if we upload the wrong type of file handle
    fh = syn.restGET('/entity/%s/filehandles' % entity.id)['list'][0]
    assert fh[
        'concreteType'] == 'org.sagebionetworks.repo.model.file.S3FileHandle'

    # Create a different temporary file
    fname = utils.make_bogus_data_file()
    schedule_for_cleanup(fname)

    # Update existing FileEntity
    entity.path = fname
    entity = syn.store(entity)

    # Download and verify that it is the same file
    entity = syn.get(entity)
    assert entity['files'][0] == os.path.basename(fname)
    assert filecmp.cmp(fname, entity['path'])
def test_create_Link_to_entity_with_the_same_parent():
    parent = "syn123"
    file = File("new file", parent=parent, id="syn456")
    file_bundle = {
        'accessControlList': '/repo/v1/entity/syn456/acl',
        'entityType': 'org.sagebionetworks.repo.model.FileEntity',
        'annotations': '/repo/v1/entity/syn456/annotations',
        'uri': '/repo/v1/entity/syn456',
        'createdOn': '2018-08-27T20:48:43.562Z',
        'parentId': 'syn123',
        'versionNumber': 1,
        'dataFileHandleId': '3594',
        'modifiedOn': '2018-08-27T20:48:44.938Z',
        'versionLabel': '1',
        'createdBy': '1',
        'versions': '/repo/v1/entity/syn456/version',
        'name': 'new file',
        'concreteType': 'org.sagebionetworks.repo.model.FileEntity',
        'etag': '62fd1a76-ed9c-425a-b4a8-1c4e6aad7fc6',
        'modifiedBy': '1',
        'id': 'syn456',
        'versionUrl': '/repo/v1/entity/syn456/version/1'
    }
    link = Link(targetId=file, parent=parent)
    syn = Synapse(skip_checks=True)
    with patch.object(syn, "_getEntity", return_value=file_bundle):
        pytest.raises(ValueError, syn.store, link)
Beispiel #20
0
def addExternalDataToSynapse(exBucket, exFolder, toLoadSynID, annotYAML, syn):
    '''Adds data in external S3 bucket to Synapse.'''

    s3 = boto.connect_s3()
    H3bucket = s3.get_bucket(exBucket)
    rnaseqDir = H3bucket.list(prefix=exFolder)

    loadedEntities = getLoadedEntities(parent=toLoadSynID, syn=syn)
    BAMannotations = parseProjectAnnotations(yamlPath=annotYAML)
    ## TODO Add bucket and key to annotations
    addedCount = 0
    for key in rnaseqDir:
        if os.path.basename(key.name) in loadedEntities:
            print 'Skipping %s because it is already loaded to Syanpse.' % os.path.basename(
                key.name)
        elif key.name.endswith('bam'):
            s3path = '/'.join(['http://s3.amazonaws.com', exBucket, key.name])
            BAMEntity = File(path=s3path,
                             name=os.path.basename(key.name),
                             description='BAM format aligned reads',
                             parent=toLoadSynID,
                             synapseStore=False,
                             annotations=BAMannotations)
            BAMEntity = syn.store(BAMEntity)
            addedCount += 1
    return (addedCount)
Beispiel #21
0
def storeFile(syn,
              fileName,
              parentId,
              center,
              fileFormat,
              dataSubType,
              platform=None,
              cBioFileFormat=None,
              used=None):
    logger.info("STORING FILES")
    fileEnt = File(fileName, parent=parentId)
    fileEnt.center = center
    fileEnt.species = "Human"
    fileEnt.consortium = 'GENIE'
    fileEnt.dataType = "genomicVariants"
    fileEnt.fundingAgency = "AACR"
    fileEnt.assay = 'targetGeneSeq'
    fileEnt.fileFormat = fileFormat
    fileEnt.dataSubType = dataSubType
    fileEnt.fileStage = "staging"
    fileEnt.platform = platform
    if platform is not None:
        fileEnt.platform = platform
    if cBioFileFormat is not None:
        fileEnt.cBioFileFormat = cBioFileFormat
    ent = syn.store(fileEnt, used=used)
    return (ent)
def test_synStore_sftpIntegration():
    """Creates a File Entity on an sftp server and add the external url. """
    filepath = utils.make_bogus_binary_file(1 * MB - 777771)
    try:
        file = syn.store(File(filepath, parent=project))
        file2 = syn.get(file)
        assert file.externalURL == file2.externalURL and urlparse(
            file2.externalURL).scheme == 'sftp'

        tmpdir = tempfile.mkdtemp()
        schedule_for_cleanup(tmpdir)

        ## test filename override
        file2.fileNameOverride = "whats_new_in_baltimore.data"
        file2 = syn.store(file2)
        ## TODO We haven't defined how filename override interacts with
        ## TODO previously cached files so, side-step that for now by
        ## TODO making sure the file is not in the cache!
        syn.cache.remove(file2.dataFileHandleId, delete=True)
        file3 = syn.get(file, downloadLocation=tmpdir)
        assert os.path.basename(file3.path) == file2.fileNameOverride

        ## test that we got an MD5 à la SYNPY-185
        assert_is_not_none(file3.md5)
        fh = syn._getFileHandle(file3.dataFileHandleId)
        assert_is_not_none(fh['contentMd5'])
        assert_equals(file3.md5, fh['contentMd5'])
    finally:
        try:
            os.remove(filepath)
        except Exception:
            print(traceback.format_exc())
Beispiel #23
0
def _manifest_upload(syn, df):
    items = []
    for i, row in df.iterrows():
        file = File(
            path=row['path'],
            parent=row['parent'],
            **{key: row[key]
               for key in FILE_CONSTRUCTOR_FIELDS if key in row},
        )

        annotations = dict(
            row.drop(FILE_CONSTRUCTOR_FIELDS + STORE_FUNCTION_FIELDS +
                     REQUIRED_FIELDS + PROVENANCE_FIELDS,
                     errors='ignore'))

        # if a item in the manifest upload is an empty string we do not want to upload that
        # as an empty string annotation
        file.annotations = {k: v for k, v in annotations.items() if v != ''}

        item = _SyncUploadItem(
            file,
            row['used'] if 'used' in row else [],
            row['executed'] if 'executed' in row else [],
            {key: row[key]
             for key in STORE_FUNCTION_FIELDS if key in row},
        )
        items.append(item)

    with _sync_executor(syn) as executor:
        uploader = _SyncUploader(syn, executor)
        uploader.upload(items)

    return True
Beispiel #24
0
def test_download_check_md5(syn, project, schedule_for_cleanup):
    tempfile_path = utils.make_bogus_data_file()
    schedule_for_cleanup(tempfile_path)
    entity_bad_md5 = syn.store(File(path=tempfile_path, parent=project['id'], synapseStore=False))

    pytest.raises(SynapseMd5MismatchError, syn._download_from_URL, entity_bad_md5['externalURL'], tempfile.gettempdir(),
                  entity_bad_md5['dataFileHandleId'], expected_md5="2345a")
def test_syncFromSynapse__downloadFile_is_false(syn):
    """
    Verify when passing the argument downloadFile is equal to False,
    syncFromSynapse won't download the file to clients' local end.
    """

    project = Project(name="the project", parent="whatever", id="syn123")
    file = File(name="a file", parent=project, id="syn456")
    folder = Folder(name="a folder", parent=project, id="syn789")

    entities = {
        file.id: file,
        folder.id: folder,
    }

    def syn_get_side_effect(entity, *args, **kwargs):
        return entities[id_of(entity)]

    with patch.object(syn, "getChildren", side_effect=[[folder, file], []]),\
            patch.object(syn, "get", side_effect=syn_get_side_effect) as patch_syn_get:

        synapseutils.syncFromSynapse(syn, project, downloadFile=False)
        patch_syn_get.assert_called_once_with(
            file['id'],
            downloadLocation=None,
            ifcollision='overwrite.local',
            followLink=False,
            downloadFile=False,
        )
def test_store__changing_from_Synapse_to_externalURL_by_changing_path(syn, project, schedule_for_cleanup):
    # create a temp file
    temp_path = utils.make_bogus_data_file()
    schedule_for_cleanup(temp_path)

    ext = syn.store(File(temp_path, parent=project, synapseStore=True))
    ext = syn.get(ext)
    assert "org.sagebionetworks.repo.model.file.S3FileHandle" == ext._file_handle.concreteType

    ext.synapseStore = False
    ext = syn.store(ext)

    # do a get to make sure filehandle has been updated correctly
    ext = syn.get(ext.id, downloadFile=True)
    assert "org.sagebionetworks.repo.model.file.ExternalFileHandle" == ext._file_handle.concreteType
    assert utils.as_url(temp_path) == ext.externalURL
    assert not ext.synapseStore

    # swap back to synapse storage
    ext.synapseStore = True
    ext = syn.store(ext)
    # do a get to make sure filehandle has been updated correctly
    ext = syn.get(ext.id, downloadFile=True)
    assert "org.sagebionetworks.repo.model.file.S3FileHandle" == ext._file_handle.concreteType
    assert ext.externalURL is None
    assert ext.synapseStore
Beispiel #27
0
def test_dispose(syn_client, syn_test_helper, new_temp_file):
    project = syn_client.store(Project(name=syn_test_helper.uniq_name()))

    folder = syn_client.store(
        Folder(name=syn_test_helper.uniq_name(prefix='Folder '),
               parent=project))

    file = syn_client.store(
        File(name=syn_test_helper.uniq_name(prefix='File '),
             path=new_temp_file,
             parent=folder))

    syn_objects = [project, folder, file]

    for syn_obj in syn_objects:
        syn_test_helper.dispose_of(syn_obj)
        assert syn_obj in syn_test_helper._trash

    syn_test_helper.dispose()
    assert len(syn_test_helper._trash) == 0

    for syn_obj in syn_objects:
        with pytest.raises(synapseclient.exceptions.SynapseHTTPError) as ex:
            syn_client.get(syn_obj, downloadFile=False)

        err_str = str(ex.value)
        assert "Not Found" in err_str or "cannot be found" in err_str or "is in trash can" in err_str or "does not exist" in err_str

    try:
        os.remove(new_temp_file)
    except:
        pass
Beispiel #28
0
def upload_file(file_path: str, login: str, parent: str, description: str = None) -> None:
    """Uploads file to Synapse. Password must be stored in environment variable SYNAPSE_PASS"""
    description = '' if None else description
    f = File(file_path, description=description, parent=parent)

    syn = _syn_login(login)
    syn.store(f)
Beispiel #29
0
def test_store_file_handle_update_metadata():
    original_file_path = utils.make_bogus_data_file()
    schedule_for_cleanup(original_file_path)

    #upload the project
    entity = syn.store(File(original_file_path, parent=project))
    old_file_handle = entity._file_handle

    #create file handle to replace the old one
    replacement_file_path = utils.make_bogus_data_file()
    schedule_for_cleanup(replacement_file_path)
    new_file_handle = syn.uploadFileHandle(replacement_file_path,
                                           parent=project)

    entity.dataFileHandleId = new_file_handle['id']
    new_entity = syn.store(entity)

    #make sure _file_handle info was changed (_file_handle values are all changed at once so just verifying id change is sufficient)
    assert_equal(new_file_handle['id'], new_entity._file_handle['id'])
    assert_not_equal(old_file_handle['id'], new_entity._file_handle['id'])

    #check that local_state was updated
    assert_equal(replacement_file_path, new_entity.path)
    assert_equal(os.path.dirname(replacement_file_path), new_entity.cacheDir)
    assert_equal([os.path.basename(replacement_file_path)], new_entity.files)
Beispiel #30
0
def test_resume_partial_download(syn, project, schedule_for_cleanup):
    original_file = utils.make_bogus_data_file(40000)

    entity = File(original_file, parent=project['id'])
    entity = syn.store(entity)

    # stash the original file for comparison later
    shutil.move(original_file, original_file+'.original')
    original_file += '.original'
    schedule_for_cleanup(original_file)

    temp_dir = tempfile.gettempdir()

    url = '%s/entity/%s/file' % (syn.repoEndpoint, entity.id)
    path = syn._download_from_URL(url, destination=temp_dir, fileHandleId=entity.dataFileHandleId,
                                  expected_md5=entity.md5)

    # simulate an imcomplete download by putting the
    # complete file back into its temporary location
    tmp_path = utils.temp_download_filename(temp_dir, entity.dataFileHandleId)
    shutil.move(path, tmp_path)

    # ...and truncating it to some fraction of its original size
    with open(tmp_path, 'r+') as f:
        f.truncate(3*os.path.getsize(original_file)//7)

    # this should complete the partial download
    path = syn._download_from_URL(url, destination=temp_dir, fileHandleId=entity.dataFileHandleId,
                                  expected_md5=entity.md5)

    assert filecmp.cmp(original_file, path), "File comparison failed"