def uploadToSynapse(f):
    """Given a filepath extracts metadata and uploads to Synapse"""
    center, sample_id, workflow_name, date, call_type, dataType, fileType = ['']*7
    url = URLBASE+f
    if   'OICR_BL' in f: center = 'oicr_bl'
    elif 'CRG/clindel/somatic' in f:  center = 'crg_clindel'
    else: center = f.split('/')[4]
    filename =  f.split('/')[-1]
    if center in ('yale', 'wustl', 'LOHcomplete'):
        if filename =='bd829214-f230-4331-b234-def10bbe7938CNV.vcf.gz':
            sample_id, dataType, fileType='bd829214-f230-4331-b234-def10bbe7938', 'cnv', 'vcf'
        else:
            sample_id, dataType = filename.lower().split('.')[:2]
            fileType =  [i for i in filename.split('.')[2:] if i != 'gz'][-1]
    elif center in ('broad', 'BSC', 'oicr_sga', 'mda_kchen', 'MDA_HGSC', 'mcgill_popsv', 'sfu', 'UCSC', 'oicr_bl', 'Synteka_pgm21', 'crg_clindel'):
        sample_id, workflow_name, date, call_type, dataType  =  filename.replace('indels', 'indel', split('.')[:5])
        fileType =  [i for i in filename.split('.')[5:] if i != 'gz'][-1]
    else:
        print 'Not uploading:', f
        return 
    print center, workflow_name, date, call_type, dataType, fileType
    file = File(url, parentId=DIRS[center], synapseStore=False)
    file.center = center.lower()
    file.sample_id = sample_id
    file.workflow_name = workflow_name
    file.date = date
    file.call_type = call_type
    file.dataType = 'DNA'
    file.disease = 'Cancer'
    file.dataSubType = dataType
    file.fileType = fileType
    #file.analysis_id_tumor = ?????
    syn.store(file, forceVersion=False)
def test_Entity():
    # Test CRUD on Entity objects, Project, Folder, File with createEntity/getEntity/updateEntity
    project_name = str(uuid.uuid4())
    project = Project(project_name, description='Bogus testing project')
    project = syn.createEntity(project)
    schedule_for_cleanup(project)

    folder = Folder('Test Folder', parent=project, description='A place to put my junk', foo=1000)
    folder = syn.createEntity(folder)

    path = utils.make_bogus_data_file()
    schedule_for_cleanup(path)
    a_file = File(path, parent=folder, description='Random data for testing', foo='An arbitrary value', bar=[33,44,55], bday=Datetime(2013,3,15))
    a_file = syn._createFileEntity(a_file)

    ## local object state should be preserved
    assert a_file.path == path

    ## check the project entity
    project = syn.getEntity(project)
    assert project.name == project_name

    ## check the folder entity
    folder = syn.getEntity(folder.id)
    assert folder.name == 'Test Folder'
    assert folder.parentId == project.id
    assert folder.foo[0] == 1000

    ## check the file entity
    a_file = syn.getEntity(a_file)
    assert a_file['foo'][0] == 'An arbitrary value'
    assert a_file['bar'] == [33,44,55]
    assert a_file['bday'][0] == Datetime(2013,3,15)

    ## make sure file comes back intact
    a_file = syn.downloadEntity(a_file)
    assert filecmp.cmp(path, a_file.path)

    #TODO We're forgotten the local file path
    a_file.path = path

    ## update the file entity
    a_file['foo'] = 'Another arbitrary chunk of text data'
    a_file['new_key'] = 'A newly created value'
    a_file = syn.updateEntity(a_file)
    assert a_file['foo'][0] == 'Another arbitrary chunk of text data'
    assert a_file['bar'] == [33,44,55]
    assert a_file['bday'][0] == Datetime(2013,3,15)
    assert a_file.new_key[0] == 'A newly created value'
    assert a_file.path == path

    ## upload a new file
    new_path = utils.make_bogus_data_file()
    schedule_for_cleanup(new_path)

    a_file = syn.uploadFile(a_file, new_path)

    ## make sure file comes back intact
    a_file = syn.downloadEntity(a_file)
    assert filecmp.cmp(new_path, a_file.path)
def test_round_trip():
    fh = None
    filepath = utils.make_bogus_binary_file(6*MB + 777771, verbose=True)
    print 'Made bogus file: ', filepath
    try:
        fh = syn._chunkedUploadFile(filepath, verbose=False)
        # print 'FileHandle:'
        # syn.printEntity(fh)

        # Download the file and compare it with the original
        junk = File(filepath, parent=project, dataFileHandleId=fh['id'])
        junk.properties.update(syn._createEntity(junk.properties))
        junk.update(syn._downloadFileEntity(junk, filepath))
        assert filecmp.cmp(filepath, junk.path)

    finally:
        try:
            if 'junk' in locals():
                syn.delete(junk)
        except Exception:
            print traceback.format_exc()
        try:
            os.remove(filepath)
        except Exception:
            print traceback.format_exc()
        if fh:
            # print 'Deleting fileHandle', fh['id']
            syn._deleteFileHandle(fh)
def test_uploadFileEntity(syn, project, schedule_for_cleanup):
    # Create a FileEntity
    # Dictionaries default to FileEntity as a type
    fname = utils.make_bogus_data_file()
    schedule_for_cleanup(fname)
    entity = File(name='fooUploadFileEntity',
                  path=fname,
                  parentId=project['id'],
                  description='A test file entity')
    entity = syn.store(entity)

    # Download and verify
    entity = syn.get(entity)

    assert entity['files'][0] == os.path.basename(fname)
    assert filecmp.cmp(fname, entity['path'])

    # Check if we upload the wrong type of file handle
    fh = syn.restGET('/entity/%s/filehandles' % entity.id)['list'][0]
    assert fh[
        'concreteType'] == 'org.sagebionetworks.repo.model.file.S3FileHandle'

    # Create a different temporary file
    fname = utils.make_bogus_data_file()
    schedule_for_cleanup(fname)

    # Update existing FileEntity
    entity.path = fname
    entity = syn.store(entity)

    # Download and verify that it is the same file
    entity = syn.get(entity)
    assert entity['files'][0] == os.path.basename(fname)
    assert filecmp.cmp(fname, entity['path'])
def test_store_activity():
    # Create a File and an Activity
    path = utils.make_bogus_binary_file()
    schedule_for_cleanup(path)
    entity = File(path, name='Hinkle horn honking holes', parent=project)
    honking = Activity(name='Hinkle horn honking', 
                       description='Nettlebed Cave is a limestone cave located on the South Island of New Zealand.')
    honking.used('http://www.flickr.com/photos/bevanbfree/3482259379/')
    honking.used('http://www.flickr.com/photos/bevanbfree/3482185673/')

    # This doesn't set the ID of the Activity
    entity = syn.store(entity, activity=honking)

    # But this does
    honking = syn.getProvenance(entity.id)

    # Verify the Activity
    assert honking['name'] == 'Hinkle horn honking'
    assert len(honking['used']) == 2
    assert honking['used'][0]['concreteType'] == 'org.sagebionetworks.repo.model.provenance.UsedURL'
    assert honking['used'][0]['wasExecuted'] == False
    assert honking['used'][0]['url'].startswith('http://www.flickr.com/photos/bevanbfree/3482')
    assert honking['used'][1]['concreteType'] == 'org.sagebionetworks.repo.model.provenance.UsedURL'
    assert honking['used'][1]['wasExecuted'] == False

    # Store another Entity with the same Activity
    entity = File('http://en.wikipedia.org/wiki/File:Nettlebed_cave.jpg', 
                  name='Nettlebed Cave', parent=project, synapseStore=False)
    entity = syn.store(entity, activity=honking)

    # The Activities should match
    honking2 = syn.getProvenance(entity)
    assert honking['id'] == honking2['id']
def test_synapseStore_flag():
    # Store a path to a local file
    path = utils.make_bogus_data_file()
    schedule_for_cleanup(path)
    bogus = File(path, name='Totally bogus data', parent=project, synapseStore=False)
    bogus = syn.store(bogus)
    
    # Verify the thing can be downloaded as a URL
    bogus = syn.get(bogus, downloadFile=False)
    assert bogus.name == 'Totally bogus data'
    assert bogus.path == path, "Path: %s\nExpected: %s" % (bogus.path, path)
    assert bogus.synapseStore == False

    # Make sure the test runs on Windows and other OS's
    if path[0].isalpha() and path[1]==':':
        # A Windows file URL looks like this: file:///c:/foo/bar/bat.txt
        expected_url = 'file:///' + path.replace("\\","/")
    else:
        expected_url = 'file://' + path

    assert bogus.externalURL == expected_url, 'URL: %s\nExpected %s' % (bogus.externalURL, expected_url)

    # A file path that doesn't exist should still work
    bogus = File('/path/to/local/file1.xyz', parentId=project.id, synapseStore=False)
    bogus = syn.store(bogus)
    assert_raises(IOError, syn.get, bogus)
    assert bogus.synapseStore == False

    # Try a URL
    bogus = File('http://dev-versions.synapse.sagebase.org/synapsePythonClient', parent=project, synapseStore=False)
    bogus = syn.store(bogus)
    bogus = syn.get(bogus)
    assert bogus.synapseStore == False
Beispiel #7
0
def test_syncFromSynapse():
    """This function tests recursive download as defined in syncFromSynapse
    most of the functionality of this function are already tested in the 
    tests/integration/test_command_line_client::test_command_get_recursive_and_query

    which means that the only test if for path=None
    """
    # Create a Project
    project_entity = syn.store(synapseclient.Project(name=str(uuid.uuid4())))
    schedule_for_cleanup(project_entity.id)

    # Create a Folder in Project
    folder_entity = syn.store(
        Folder(name=str(uuid.uuid4()), parent=project_entity))

    # Create and upload two files in Folder
    uploaded_paths = []
    for i in range(2):
        f = utils.make_bogus_data_file()
        uploaded_paths.append(f)
        schedule_for_cleanup(f)
        syn.store(File(f, parent=folder_entity))
    # Add a file in the project level as well
    f = utils.make_bogus_data_file()
    uploaded_paths.append(f)
    schedule_for_cleanup(f)
    syn.store(File(f, parent=project_entity))

    # Test recursive get
    output = synapseutils.syncFromSynapse(syn, project_entity)

    assert_equals(len(output), len(uploaded_paths))
    for f in output:
        assert_in(f.path, uploaded_paths)
def test_upload_string():
    ## This tests the utility that uploads  a _string_ rather than
    ## a file on disk, to S3.

    fh = None
    content = "My dog has fleas.\n"
    f = tempfile.NamedTemporaryFile(suffix=".txt", delete=False)
    f.write(content)
    f.close()
    filepath = f.name

    print 'Made bogus file: ', filepath
    try:
        fh = syn._uploadStringToFile(content)
        # print 'FileHandle:'
        # syn.printEntity(fh)

        # Download the file and compare it with the original
        junk = File(filepath, parent=project, dataFileHandleId=fh['id'])
        junk.properties.update(syn._createEntity(junk.properties))
        junk.update(syn._downloadFileEntity(junk, filepath))
        assert filecmp.cmp(filepath, junk.path)

    finally:
        try:
            if 'junk' in locals():
                syn.delete(junk)
        except Exception:
            print traceback.format_exc()
        try:
            os.remove(filepath)
        except Exception:
            print traceback.format_exc()
def test_get_local_file():
    """Tests synapse.get() with local a local file """
    new_path = utils.make_bogus_data_file()
    schedule_for_cleanup(new_path)
    folder = Folder('TestFindFileFolder', parent=project, description='A place to put my junk')
    folder = syn.createEntity(folder)

    #Get an nonexistent file in Synapse
    assert_raises(SynapseError, syn.get, new_path)

    #Get a file really stored in Synapse
    ent_folder = syn.store(File(new_path, parent=folder))
    ent2 = syn.get(new_path)
    assert ent_folder.id==ent2.id and ent_folder.versionNumber==ent2.versionNumber

    #Get a file stored in Multiple locations #should display warning
    ent = syn.store(File(new_path, parent=project))
    ent = syn.get(new_path)

    #Get a file stored in multiple locations with limit set
    ent = syn.get(new_path, limitSearch=folder.id)
    assert ent.id == ent_folder.id and ent.versionNumber==ent_folder.versionNumber

    #Get a file that exists but such that limitSearch removes them and raises error
    assert_raises(SynapseError, syn.get, new_path, limitSearch='syn1')
def test_getWithEntityBundle(*mocks):
    mocks = [item for item in mocks]
    is_loco_mock              = mocks.pop()
    cache_location_guess_mock = mocks.pop()
    download_file_mock        = mocks.pop()
    
    # -- Change downloadLocation but do not download more than once --
    is_loco_mock.return_value = False
    
    bundle = {"entity"     : {"name": "anonymous", 
                              "dataFileHandleId": "-1337", 
                              "concreteType": "org.sagebionetworks.repo.model.FileEntity",
                              "parentId": "syn12345"},
              "fileHandles": [{u'concreteType': u'org.sagebionetworks.repo.model.file.S3FileHandle',
                               u'fileName': u'anonymous',
                               u'contentMd5': u'1698d26000d60816caab15169efcd23a',
                               u'id': u'-1337'}],
              "annotations": {}}

    # Make the cache point to some temporary location
    cacheDir = synapseclient.cache.determine_cache_directory(bundle['entity'])
    
    # Pretend that the file is downloaded by the first call to syn._downloadFileEntity
    # The temp file should be added to the cache by the first syn._getWithEntityBundle() call
    f, cachedFile = tempfile.mkstemp()
    os.close(f)
    defaultLocation = os.path.join(cacheDir, bundle['entity']['name'])
    cache_location_guess_mock.return_value = (cacheDir, defaultLocation, cachedFile)
    
    # Make sure the Entity is updated with the cached file path
    def _downloadFileEntity(entity, path, submission):
        # We're disabling the download, but the given path should be within the cache
        assert path == defaultLocation
        return {"path": cachedFile}
    download_file_mock.side_effect = _downloadFileEntity

    # Make sure the cache does not already exist
    cacheMap = os.path.join(cacheDir, '.cacheMap')
    if os.path.exists(cacheMap):
        os.remove(cacheMap)
    
    syn._getWithEntityBundle(entityBundle=bundle, entity=None, downloadLocation=cacheDir, ifcollision="overwrite.local")
    syn._getWithEntityBundle(entityBundle=bundle, entity=None, ifcollision="overwrite.local")
    e = syn._getWithEntityBundle(entityBundle=bundle, entity=None, downloadLocation=cacheDir, ifcollision="overwrite.local")
    assert download_file_mock.call_count == 1

    assert e.name == bundle["entity"]["name"]
    assert e.parentId == bundle["entity"]["parentId"]
    assert e.cacheDir == cacheDir
    assert bundle['entity']['name'] in e.files
    assert e.path == os.path.join(cacheDir, bundle["entity"]["name"])

    ## test preservation of local state
    url = 'http://foo.com/secretstuff.txt'
    e = File(name='anonymous', parentId="syn12345", synapseStore=False, externalURL=url)
    e.local_state({'zap':'pow'})
    e = syn._getWithEntityBundle(entityBundle=bundle, entity=e)
    assert e.local_state()['zap'] == 'pow'
    assert e.synapseStore == False
    assert e.externalURL == url
def test_store_with_create_or_update_flag():
    project = create_project()

    filepath = utils.make_bogus_binary_file()
    bogus1 = File(filepath, name='Bogus Test File', parent=project)

    bogus1 = syn.store(bogus1, createOrUpdate=True)

    # Create a different file with the same name and parent
    new_filepath = utils.make_bogus_binary_file()
    bogus1.path = new_filepath

    # Expected behavior is that a new version of the first File will be created
    bogus2 = syn.store(bogus1, createOrUpdate=True)

    assert bogus2.id == bogus1.id
    assert bogus2.versionNumber == 2
    assert not filecmp.cmp(bogus2.path, filepath)

    bogus2a = syn.get(bogus2.id)
    assert bogus2a.id == bogus1.id
    assert bogus2a.versionNumber == 2
    assert filecmp.cmp(bogus2.path, bogus2a.path)

    # Create yet another file with the same name and parent
    newer_filepath = utils.make_bogus_binary_file()
    bogus3 = File(newer_filepath, name='Bogus Test File', parent=project)

    # Expected behavior is raising an exception with a 409 error
    assert_raises(requests.exceptions.HTTPError, syn.store, bogus3, createOrUpdate=False)
Beispiel #12
0
def upload(args,syn):
	if args.dataType == "rnaseq":
		parentId = "syn6034916"
		pipeline = "syn6126122"
		dataType = "RNASeq"
	elif args.dataType == "dnaseq":
		parentId = "syn6034751"
		pipeline = "syn6126123"
		dataType = "TargDNASeq"
	elif args.dataType == "snparray":
		parentId = "syn6038475"
		pipeline = "syn6126121"
		dataType = "SNParray"
	elif args.dataType == "exparray":
		parentId = "syn6038915"
		pipeline = "syn6126120"
		dataType = "expression_microarray"
	elif args.dataType == "exome":
		parentId = "syn6115597"
		dataType = "exome"
		pipeline = ""
	else:
		raise ValueError("dataType needs to be rnaseq/dnaseq/snparray/exparray/exome")
	if args.workflow is not None:
		workflow = syn.get(pipeline,downloadFile=False)
		workflow.path = args.workflow
		workflow.name = os.path.basename(args.workflow)
		workflow = syn.store(workflow)
		pipeline = workflow.id
	fileEnt = File(args.input,parent=parentId)
	#fileEnt.annotations = temp.to_dict('index').values()[0]
	fileEnt.dataType = dataType
	fileEnt.sampleId = sampleId
	fileEnt = syn.store(fileEnt,used = pipeline)
	return(fileEnt.id)
    def test_upload__error(self, syn):
        """Verify that if an item upload fails the error is raised in the main thread
        and any running Futures are cancelled"""

        item_1 = _SyncUploadItem(File(path='/tmp/foo', parentId='syn123'), [],
                                 [], {})
        item_2 = _SyncUploadItem(File(path='/tmp/bar', parentId='syn123'), [],
                                 [], {})
        items = [item_1, item_2]

        def syn_store_side_effect(entity, *args, **kwargs):
            if entity.path == entity.path:
                raise ValueError()
            return Mock()

        uploader = _SyncUploader(syn, get_executor())
        original_abort = uploader._abort

        def abort_side_effect(futures):
            return original_abort(futures)

        with patch.object(syn, 'store') as mock_syn_store, \
                patch.object(uploader, '_abort') as mock_abort:

            mock_syn_store.side_effect = syn_store_side_effect
            mock_abort.side_effect = abort_side_effect
            with pytest.raises(ValueError):
                uploader.upload(items)

            # it would be aborted with Futures
            mock_abort.assert_called_once_with([ANY])
            isinstance(mock_abort.call_args_list[0][0], Future)
def test_ftp_download():
    """Test downloading an Entity that points to a file on an FTP server. """

    # Another test with an external reference. This is because we only need to test FTP download; not upload. Also so we don't have to maintain an FTP server just for this purpose.
    # Make an entity that points to an FTP server file.
    entity = File(parent=project['id'], name='1KB.zip')
    fileHandle = {}
    fileHandle['externalURL'] = 'ftp://speedtest.tele2.net/1KB.zip'
    fileHandle["fileName"] = entity.name
    fileHandle["contentType"] = "application/zip"
    fileHandle["contentMd5"] = '0f343b0931126a20f133d67c2b018a3b'
    fileHandle["contentSize"] = 1024
    fileHandle[
        "concreteType"] = "org.sagebionetworks.repo.model.file.ExternalFileHandle"
    fileHandle = syn.restPOST('/externalFileHandle', json.dumps(fileHandle),
                              syn.fileHandleEndpoint)
    entity.dataFileHandleId = fileHandle['id']
    entity = syn.store(entity)

    # Download the entity and check that MD5 matches expected
    FTPfile = syn.get(entity.id,
                      downloadLocation=os.getcwd(),
                      downloadFile=True)
    assert FTPfile.md5 == utils.md5_for_file(FTPfile.path).hexdigest()
    schedule_for_cleanup(entity)
    os.remove(FTPfile.path)
def test_get_and_store_by_name_and_parent_id():
    project = create_project()

    path = utils.make_bogus_data_file()
    schedule_for_cleanup(path)

    f = File(path, name='Foobarbat', parent=project)
    f2 = syn.store(f)
    f = syn.get(f)

    assert f.id == f2.id
    assert f.name == f2.name
    assert f.parentId == f2.parentId

    ## new file
    path = utils.make_bogus_data_file()
    schedule_for_cleanup(path)

    ## should create a new version of the previous File entity
    f3 = File(path,
              name='Foobarbat',
              parent=project,
              description='banana',
              junk=1234)
    f3 = syn.store(f3)

    ## should be an update of the existing entity with the same name and parent
    assert f3.id == f.id
    assert f3.description == 'banana'
    assert f3.junk == [1234]
    assert filecmp.cmp(path, f3.path)
def test_extract_file_entity_metadata__ensure_correct_row_metadata(syn):
    # Test for SYNPY-692, where 'contentType' was incorrectly set on all rows except for the very first row.

    # create 2 file entities with different metadata
    entity1 = File(parent='syn123',
                   id='syn456',
                   contentType='text/json',
                   path='path1',
                   name='entity1',
                   synapseStore=True)
    entity2 = File(parent='syn789',
                   id='syn890',
                   contentType='text/html',
                   path='path2',
                   name='entity2',
                   synapseStore=False)
    files = [entity1, entity2]

    # we don't care about provenance metadata in this case
    with patch.object(synapseutils.sync,
                      "_get_file_entity_provenance_dict",
                      return_value={}):
        # method under test
        keys, data = synapseutils.sync._extract_file_entity_metadata(
            syn, files)

    # compare source entity metadata gainst the extracted metadata
    for file_entity, file_row_data in zip(files, data):
        for key in keys:
            if key == 'parent':  # workaroundd for parent/parentId inconsistency. (SYNPY-697)
                assert file_entity.get('parentId') == file_row_data.get(key)
            else:
                assert file_entity.get(key) == file_row_data.get(key)
Beispiel #17
0
def _manifest_upload(syn, df):
    items = []
    for i, row in df.iterrows():
        file = File(
            path=row['path'],
            parent=row['parent'],
            **{key: row[key]
               for key in FILE_CONSTRUCTOR_FIELDS if key in row},
        )
        file.annotations = dict(
            row.drop(FILE_CONSTRUCTOR_FIELDS + STORE_FUNCTION_FIELDS +
                     REQUIRED_FIELDS + PROVENANCE_FIELDS,
                     errors='ignore'))

        item = _SyncUploadItem(
            file,
            row['used'] if 'used' in row else [],
            row['executed'] if 'executed' in row else [],
            {key: row[key]
             for key in STORE_FUNCTION_FIELDS if key in row},
        )
        items.append(item)

    with _sync_executor(syn) as executor:
        uploader = _SyncUploader(syn, executor)
        uploader.upload(items)

    return True
def test_upload_string():
    ## This tests the utility that uploads  a _string_ rather than 
    ## a file on disk, to S3.
    
    fh = None
    content = "My dog has fleas.\n"
    f = tempfile.NamedTemporaryFile(suffix=".txt", delete=False)
    f.write(content)
    f.close()
    filepath=f.name
        
    print 'Made bogus file: ', filepath
    try:
        fh = syn._uploadStringToFile(content)
        # print 'FileHandle:'
        # syn.printEntity(fh)

        # Download the file and compare it with the original
        junk = File(filepath, parent=project, dataFileHandleId=fh['id'])
        junk.properties.update(syn._createEntity(junk.properties))
        junk.update(syn._downloadFileEntity(junk, filepath))
        assert filecmp.cmp(filepath, junk.path)

    finally:
        try:
            if 'junk' in locals():
                syn.delete(junk)
        except Exception:
            print traceback.format_exc()
        try:
            os.remove(filepath)
        except Exception:
            print traceback.format_exc()
Beispiel #19
0
def _manifest_upload(syn, df):
    items = []
    for i, row in df.iterrows():
        file = File(
            path=row['path'],
            parent=row['parent'],
            **{key: row[key]
               for key in FILE_CONSTRUCTOR_FIELDS if key in row},
        )

        annotations = dict(
            row.drop(FILE_CONSTRUCTOR_FIELDS + STORE_FUNCTION_FIELDS +
                     REQUIRED_FIELDS + PROVENANCE_FIELDS,
                     errors='ignore'))

        # if a item in the manifest upload is an empty string we do not want to upload that
        # as an empty string annotation
        file.annotations = {k: v for k, v in annotations.items() if v != ''}

        item = _SyncUploadItem(
            file,
            row['used'] if 'used' in row else [],
            row['executed'] if 'executed' in row else [],
            {key: row[key]
             for key in STORE_FUNCTION_FIELDS if key in row},
        )
        items.append(item)

    with _sync_executor(syn) as executor:
        uploader = _SyncUploader(syn, executor)
        uploader.upload(items)

    return True
def test_round_trip():
    fh = None
    filepath = utils.make_bogus_binary_file(6 * MB + 777771)
    print 'Made bogus file: ', filepath
    try:
        fh = syn._chunkedUploadFile(filepath)
        # print 'FileHandle:'
        # syn.printEntity(fh)

        # Download the file and compare it with the original
        junk = File(filepath, parent=project, dataFileHandleId=fh['id'])
        junk.properties.update(syn._createEntity(junk.properties))
        junk.update(syn._downloadFileEntity(junk, filepath))
        assert filecmp.cmp(filepath, junk.path)

    finally:
        try:
            if 'junk' in locals():
                syn.delete(junk)
        except Exception:
            print traceback.format_exc()
        try:
            os.remove(filepath)
        except Exception:
            print traceback.format_exc()
def test_multipart_upload_big_string():
    cities = ["Seattle", "Portland", "Vancouver", "Victoria",
              "San Francisco", "Los Angeles", "New York",
              "Oaxaca", "Cancún", "Curaçao", "जोधपुर",
              "অসম", "ལྷ་ས།", "ཐིམ་ཕུ་", "دبي", "አዲስ አበባ",
              "São Paulo", "Buenos Aires", "Cartagena",
              "Amsterdam", "Venice", "Rome", "Dubrovnik",
              "Sarajevo", "Madrid", "Barcelona", "Paris",
              "Αθήνα", "Ρόδος", "København", "Zürich",
              "金沢市", "서울", "แม่ฮ่องสอน", "Москва"]

    text = "Places I wanna go:\n"
    while len(text.encode('utf-8')) < multipart_upload_module.MIN_PART_SIZE:
        text += ", ".join( random.choice(cities) for i in range(5000) ) + "\n"

    fhid = multipart_upload_string(syn, text)
    print('FileHandle: {fhid}'.format(fhid=fhid))

    # Download the file and compare it with the original
    junk = File("message.txt", parent=project, dataFileHandleId=fhid)
    junk.properties.update(syn._createEntity(junk.properties))
    (tmp_f, tmp_path) = tempfile.mkstemp()
    schedule_for_cleanup(tmp_path)
    junk.update(syn._downloadFileEntity(junk, tmp_path))

    with open(junk.path, encoding='utf-8') as f:
        retrieved_text = f.read()

    assert retrieved_text == text
def test_round_trip():
    fhid = None
    filepath = utils.make_bogus_binary_file(multipart_upload_module.MIN_PART_SIZE + 777771)
    print('Made bogus file: ', filepath)
    try:
        fhid = multipart_upload(syn, filepath)
        print('FileHandle: {fhid}'.format(fhid=fhid))

        # Download the file and compare it with the original
        junk = File(filepath, parent=project, dataFileHandleId=fhid)
        junk.properties.update(syn._createEntity(junk.properties))
        (tmp_f, tmp_path) = tempfile.mkstemp()
        schedule_for_cleanup(tmp_path)
        junk.update(syn._downloadFileEntity(junk, tmp_path))
        assert filecmp.cmp(filepath, junk.path)

    finally:
        try:
            if 'junk' in locals():
                syn.delete(junk)
        except Exception:
            print(traceback.format_exc())
        try:
            os.remove(filepath)
        except Exception:
            print(traceback.format_exc())
def test_store_with_create_or_update_flag():
    project = create_project()

    filepath = utils.make_bogus_binary_file()
    bogus1 = File(filepath, name='Bogus Test File', parent=project)

    bogus1 = syn.store(bogus1, createOrUpdate=True)

    # Create a different file with the same name and parent
    new_filepath = utils.make_bogus_binary_file()
    bogus1.path = new_filepath

    # Expected behavior is that a new version of the first File will be created
    bogus2 = syn.store(bogus1, createOrUpdate=True)

    assert bogus2.id == bogus1.id
    assert bogus2.versionNumber == 2
    assert not filecmp.cmp(bogus2.path, filepath)

    bogus2a = syn.get(bogus2.id)
    assert bogus2a.id == bogus1.id
    assert bogus2a.versionNumber == 2
    assert filecmp.cmp(bogus2.path, bogus2a.path)

    # Create yet another file with the same name and parent
    newer_filepath = utils.make_bogus_binary_file()
    bogus3 = File(newer_filepath, name='Bogus Test File', parent=project)

    # Expected behavior is raising an exception with a 409 error
    assert_raises(requests.exceptions.HTTPError,
                  syn.store,
                  bogus3,
                  createOrUpdate=False)
def test_download_file_URL_false():
    # Upload an external file handle
    fileThatExists = 'http://dev-versions.synapse.sagebase.org/synapsePythonClient'
    reupload = File(fileThatExists, synapseStore=False, parent=project)
    reupload = syn.store(reupload)
    reupload = syn.get(reupload, downloadFile=False)
    originalVersion = reupload.versionNumber

    # Reupload and check that the URL and version does not get mangled
    reupload = syn.store(reupload, forceVersion=False)
    assert_equals(reupload.path, fileThatExists,
                  "Entity should still be pointing at a URL")
    assert_equals(originalVersion, reupload.versionNumber)

    # Try a URL with an extra slash at the end
    fileThatDoesntExist = 'http://dev-versions.synapse.sagebase.org/synapsePythonClient/'
    reupload.synapseStore = False
    reupload.path = fileThatDoesntExist
    reupload = syn.store(reupload)
    reupload = syn.get(reupload, downloadFile=False)
    originalVersion = reupload.versionNumber

    reupload = syn.store(reupload, forceVersion=False)
    assert_equals(reupload.path, fileThatDoesntExist,
                  "Entity should still be pointing at a URL")
    assert_equals(originalVersion, reupload.versionNumber)
    def _view_setup(cls):
        # set up a file view
        folder = syn.store(
            Folder(name="PartialRowTestFolder" + str(uuid.uuid4()),
                   parent=project))
        syn.store(
            File("~/path/doesnt/matter",
                 name="f1",
                 parent=folder,
                 synapseStore=False))
        syn.store(
            File("~/path/doesnt/matter/again",
                 name="f2",
                 parent=folder,
                 synapseStore=False))

        cols = [
            Column(name='foo', columnType='STRING', maximumSize=1000),
            Column(name='bar', columnType='STRING')
        ]
        return syn.store(
            EntityViewSchema(name='PartialRowTestViews' + str(uuid.uuid4()),
                             columns=cols,
                             addDefaultViewColumns=False,
                             parent=project,
                             scopes=[folder]))
def test_getChildren(syn, schedule_for_cleanup):
    # setup a hierarchy for folders
    # PROJECT
    # |     \
    # File   Folder
    #           |
    #         File
    project_name = str(uuid.uuid1())
    test_project = syn.store(Project(name=project_name))
    folder = syn.store(Folder(name="firstFolder", parent=test_project))
    syn.store(
        File(path="~/doesntMatter.txt",
             name="file inside folders",
             parent=folder,
             synapseStore=False))
    project_file = syn.store(
        File(path="~/doesntMatterAgain.txt",
             name="file inside project",
             parent=test_project,
             synapseStore=False))
    schedule_for_cleanup(test_project)

    expected_id_set = {project_file.id, folder.id}
    children_id_set = {x['id'] for x in syn.getChildren(test_project.id)}
    assert expected_id_set == children_id_set
def test_syncFromSynapse__manifest_is_root(
        mock__get_file_entity_provenance_dict, mock_generateManifest, syn):
    """
    Verify manifest argument equal to "root" that pass in to syncFromSynapse, it will create root_manifest file only.
    """

    project = Project(name="the project", parent="whatever", id="syn123")
    file1 = File(name="a file", parent=project, id="syn456")
    folder = Folder(name="a folder", parent=project, id="syn789")
    file2 = File(name="a file2", parent=folder, id="syn789123")

    # Structure of nested project
    # project
    #    |---> file1
    #    |---> folder
    #             |---> file2

    entities = {
        file1.id: file1,
        folder.id: folder,
        file2.id: file2,
    }

    def syn_get_side_effect(entity, *args, **kwargs):
        return entities[id_of(entity)]

    mock__get_file_entity_provenance_dict.return_value = {}

    with patch.object(syn, "getChildren", side_effect=[[folder, file1], [file2]]),\
            patch.object(syn, "get", side_effect=syn_get_side_effect) as patch_syn_get:

        synapseutils.syncFromSynapse(syn,
                                     project,
                                     path="./",
                                     downloadFile=False,
                                     manifest="root")
        assert patch_syn_get.call_args_list == [
            call(
                file1['id'],
                downloadLocation="./",
                ifcollision='overwrite.local',
                followLink=False,
                downloadFile=False,
            ),
            call(
                file2['id'],
                downloadLocation="./a folder",
                ifcollision='overwrite.local',
                followLink=False,
                downloadFile=False,
            )
        ]

        assert mock_generateManifest.call_count == 1

        call_files = mock_generateManifest.call_args_list[0][0][1]
        assert len(call_files) == 2
        assert call_files[0].id == "syn456"
        assert call_files[1].id == "syn789123"
Beispiel #28
0
def test_walk():
    walked = []
    firstfile = utils.make_bogus_data_file()
    schedule_for_cleanup(firstfile)
    project_entity = syn.store(Project(name=str(uuid.uuid4())))
    schedule_for_cleanup(project_entity.id)
    folder_entity = syn.store(
        Folder(name=str(uuid.uuid4()), parent=project_entity))
    schedule_for_cleanup(folder_entity.id)
    second_folder = syn.store(
        Folder(name=str(uuid.uuid4()), parent=project_entity))
    schedule_for_cleanup(second_folder.id)
    file_entity = syn.store(File(firstfile, parent=project_entity))
    schedule_for_cleanup(file_entity.id)

    walked.append(((project_entity.name, project_entity.id), [
        (folder_entity.name, folder_entity.id),
        (second_folder.name, second_folder.id)
    ], [(file_entity.name, file_entity.id)]))

    nested_folder = syn.store(
        Folder(name=str(uuid.uuid4()), parent=folder_entity))
    schedule_for_cleanup(nested_folder.id)
    secondfile = utils.make_bogus_data_file()
    schedule_for_cleanup(secondfile)
    second_file = syn.store(File(secondfile, parent=nested_folder))
    schedule_for_cleanup(second_file.id)
    thirdfile = utils.make_bogus_data_file()
    schedule_for_cleanup(thirdfile)
    third_file = syn.store(File(thirdfile, parent=second_folder))
    schedule_for_cleanup(third_file.id)

    walked.append(((os.path.join(project_entity.name,
                                 folder_entity.name), folder_entity.id),
                   [(nested_folder.name, nested_folder.id)], []))
    walked.append(
        ((os.path.join(os.path.join(project_entity.name, folder_entity.name),
                       nested_folder.name), nested_folder.id), [],
         [(second_file.name, second_file.id)]))
    walked.append(((os.path.join(project_entity.name, second_folder.name),
                    second_folder.id), [], [(third_file.name, third_file.id)]))

    temp = synapseutils.walk(syn, project_entity.id)
    temp = list(temp)
    #Must sort the tuples returned, because order matters for the assert
    #Folders are returned in a different ordering depending on the name
    for i in walked:
        for x in i:
            if type(x) == list:
                x = x.sort()
    for i in temp:
        for x in i:
            if type(x) == list:
                x = x.sort()
        assert i in walked

    print("CHECK: synapseutils.walk on a file should return empty generator")
    temp = synapseutils.walk(syn, second_file.id)
    assert list(temp) == []
def test_File_update_file_handle__External_non_sftp():
    external_file_handle = {
        'concreteType':
        'org.sagebionetworks.repo.model.file.ExternalFileHandle',
        'externalURL': "https://some.website"
    }
    f = File(parent="idk")
    assert f.synapseStore
    f._update_file_handle(external_file_handle)
    assert not f.synapseStore
def test_download_file_entity__correct_local_state(syn):
    mock_cache_path = utils.normalize_path("/i/will/show/you/the/path/yi.txt")
    file_entity = File(parentId="syn123")
    file_entity.dataFileHandleId = 123
    with patch.object(syn.cache, 'get', return_value=mock_cache_path):
        syn._download_file_entity(downloadLocation=None, entity=file_entity, ifcollision="overwrite.local",
                                  submission=None)
        assert mock_cache_path == file_entity.path
        assert os.path.dirname(mock_cache_path) == file_entity.cacheDir
        assert 1 == len(file_entity.files)
        assert os.path.basename(mock_cache_path) == file_entity.files[0]
Beispiel #31
0
def _manifest_upload(syn, df):
    for i, row in df.iterrows():
        #Todo extract known constructor variables
        kwargs = {key: row[key] for key in FILE_CONSTRUCTOR_FIELDS if key in row }
        entity = File(row['path'], parent=row['parent'], **kwargs)
        entity.annotations = dict(row.drop(FILE_CONSTRUCTOR_FIELDS+STORE_FUNCTION_FIELDS+REQUIRED_FIELDS, errors = 'ignore'))

        #Update provenance list again to replace all file references that were uploaded
        if 'used' in row:
            row['used'] = syn._convertProvenanceList(row['used'])
        if 'executed' in row:
            row['executed'] = syn._convertProvenanceList(row['executed'])
        kwargs = {key: row[key] for key in STORE_FUNCTION_FIELDS if key in row}
        entity = syn.store(entity, **kwargs)
    return True
def test_synapseStore_flag():
    """Test storing entities while setting the synapseStore flag to False"""
    project = create_project()

    ## store a path to a local file (synapseStore=False)
    path = utils.make_bogus_data_file()
    schedule_for_cleanup(path)
    f1 = File(path,
              name='Totally bogus data',
              parent=project,
              synapseStore=False)

    f1 = syn.store(f1)

    f1a = syn.get(f1.id, downloadFile=False)

    assert f1a.name == 'Totally bogus data'
    assert f1a.path == path, 'path=' + str(f1a.path) + '; expected=' + path
    assert f1a.synapseStore == False

    ## make sure the test runs on Windows and other OS's
    if path[0].isalpha() and path[1] == ':':
        ## a windows file URL looks like this: file:///c:/foo/bar/bat.txt
        expected_url = 'file:///' + path
    else:
        expected_url = 'file://' + path

    assert f1a.externalURL == expected_url, 'unexpected externalURL: ' + f1a.externalURL

    ## a file path that doesn't exist should still work
    f2 = File('/path/to/local/file1.xyz',
              parentId=project.id,
              synapseStore=False)
    f2 = syn.store(f2)
    try:
        syn.get(f2)
        assert False
    except Exception as err:
        assert err.message.startswith("Could not download non-existent file")
    assert f1a.synapseStore == False

    ## Try a URL
    f3 = File('http://dev-versions.synapse.sagebase.org/synapsePythonClient',
              parent=project,
              synapseStore=False)
    f3 = syn.store(f3)
    f3a = syn.get(f3)
    assert f1a.synapseStore == False
Beispiel #33
0
def test_dispose(syn_client, syn_test_helper, new_temp_file):
    project = syn_client.store(Project(name=syn_test_helper.uniq_name()))

    folder = syn_client.store(
        Folder(name=syn_test_helper.uniq_name(prefix='Folder '),
               parent=project))

    file = syn_client.store(
        File(name=syn_test_helper.uniq_name(prefix='File '),
             path=new_temp_file,
             parent=folder))

    syn_objects = [project, folder, file]

    for syn_obj in syn_objects:
        syn_test_helper.dispose_of(syn_obj)
        assert syn_obj in syn_test_helper._trash

    syn_test_helper.dispose()
    assert len(syn_test_helper._trash) == 0

    for syn_obj in syn_objects:
        with pytest.raises(synapseclient.exceptions.SynapseHTTPError) as ex:
            syn_client.get(syn_obj, downloadFile=False)

        err_str = str(ex.value)
        assert "Not Found" in err_str or "cannot be found" in err_str or "is in trash can" in err_str or "does not exist" in err_str

    try:
        os.remove(new_temp_file)
    except:
        pass
def test_synStore_sftpIntegration():
    """Creates a File Entity on an sftp server and add the external url. """
    filepath = utils.make_bogus_binary_file(1 * MB - 777771)
    try:
        file = syn.store(File(filepath, parent=project))
        file2 = syn.get(file)
        assert file.externalURL == file2.externalURL and urlparse(
            file2.externalURL).scheme == 'sftp'

        tmpdir = tempfile.mkdtemp()
        schedule_for_cleanup(tmpdir)

        ## test filename override
        file2.fileNameOverride = "whats_new_in_baltimore.data"
        file2 = syn.store(file2)
        ## TODO We haven't defined how filename override interacts with
        ## TODO previously cached files so, side-step that for now by
        ## TODO making sure the file is not in the cache!
        syn.cache.remove(file2.dataFileHandleId, delete=True)
        file3 = syn.get(file, downloadLocation=tmpdir)
        assert os.path.basename(file3.path) == file2.fileNameOverride

        ## test that we got an MD5 à la SYNPY-185
        assert_is_not_none(file3.md5)
        fh = syn._getFileHandle(file3.dataFileHandleId)
        assert_is_not_none(fh['contentMd5'])
        assert_equals(file3.md5, fh['contentMd5'])
    finally:
        try:
            os.remove(filepath)
        except Exception:
            print(traceback.format_exc())
def test_store__changing_from_Synapse_to_externalURL_by_changing_path(syn, project, schedule_for_cleanup):
    # create a temp file
    temp_path = utils.make_bogus_data_file()
    schedule_for_cleanup(temp_path)

    ext = syn.store(File(temp_path, parent=project, synapseStore=True))
    ext = syn.get(ext)
    assert "org.sagebionetworks.repo.model.file.S3FileHandle" == ext._file_handle.concreteType

    ext.synapseStore = False
    ext = syn.store(ext)

    # do a get to make sure filehandle has been updated correctly
    ext = syn.get(ext.id, downloadFile=True)
    assert "org.sagebionetworks.repo.model.file.ExternalFileHandle" == ext._file_handle.concreteType
    assert utils.as_url(temp_path) == ext.externalURL
    assert not ext.synapseStore

    # swap back to synapse storage
    ext.synapseStore = True
    ext = syn.store(ext)
    # do a get to make sure filehandle has been updated correctly
    ext = syn.get(ext.id, downloadFile=True)
    assert "org.sagebionetworks.repo.model.file.S3FileHandle" == ext._file_handle.concreteType
    assert ext.externalURL is None
    assert ext.synapseStore
Beispiel #36
0
def test_store_file_handle_update_metadata():
    original_file_path = utils.make_bogus_data_file()
    schedule_for_cleanup(original_file_path)

    #upload the project
    entity = syn.store(File(original_file_path, parent=project))
    old_file_handle = entity._file_handle

    #create file handle to replace the old one
    replacement_file_path = utils.make_bogus_data_file()
    schedule_for_cleanup(replacement_file_path)
    new_file_handle = syn.uploadFileHandle(replacement_file_path,
                                           parent=project)

    entity.dataFileHandleId = new_file_handle['id']
    new_entity = syn.store(entity)

    #make sure _file_handle info was changed (_file_handle values are all changed at once so just verifying id change is sufficient)
    assert_equal(new_file_handle['id'], new_entity._file_handle['id'])
    assert_not_equal(old_file_handle['id'], new_entity._file_handle['id'])

    #check that local_state was updated
    assert_equal(replacement_file_path, new_entity.path)
    assert_equal(os.path.dirname(replacement_file_path), new_entity.cacheDir)
    assert_equal([os.path.basename(replacement_file_path)], new_entity.files)
Beispiel #37
0
def test_syncFromSynapse__given_file_id(test_state):
    file_path = utils.make_bogus_data_file()
    test_state.schedule_for_cleanup(file_path)
    file = test_state.syn.store(File(file_path, name=str(uuid.uuid4()), parent=test_state.project, synapseStore=False))
    all_files = synapseutils.syncFromSynapse(test_state.syn, file.id)
    assert 1 == len(all_files)
    assert file == all_files[0]
def test_randomly_failing_parts():
    FAILURE_RATE = 1.0/3.0
    fhid = None
    multipart_upload_module.MIN_PART_SIZE = 5*MB
    multipart_upload_module.MAX_RETRIES = 20

    filepath = utils.make_bogus_binary_file(multipart_upload_module.MIN_PART_SIZE*2 + 777771)
    print('Made bogus file: ', filepath)

    normal_put_chunk = None

    def _put_chunk_or_fail_randomly(url, chunk, verbose=False):
        if random.random() < FAILURE_RATE:
            raise IOError("Ooops! Artificial upload failure for testing.")
        else:
            return normal_put_chunk(url, chunk, verbose)

    ## Mock _put_chunk to fail randomly
    normal_put_chunk = multipart_upload_module._put_chunk
    multipart_upload_module._put_chunk = _put_chunk_or_fail_randomly

    try:
        fhid = multipart_upload(syn, filepath)
        print('FileHandle: {fhid}'.format(fhid=fhid))

        # Download the file and compare it with the original
        junk = File(filepath, parent=project, dataFileHandleId=fhid)
        junk.properties.update(syn._createEntity(junk.properties))
        (tmp_f, tmp_path) = tempfile.mkstemp()
        schedule_for_cleanup(tmp_path)
        junk.update(syn._downloadFileEntity(junk, tmp_path))
        assert filecmp.cmp(filepath, junk.path)

    finally:
        ## Un-mock _put_chunk
        if normal_put_chunk:
            multipart_upload_module._put_chunk = normal_put_chunk

        try:
            if 'junk' in locals():
                syn.delete(junk)
        except Exception:
            print(traceback.format_exc())
        try:
            os.remove(filepath)
        except Exception:
            print(traceback.format_exc())
def test_ExternalFileHandle():
    # Tests shouldn't have external dependencies, but this is a pretty picture of Singapore
    singapore_url = 'http://upload.wikimedia.org/wikipedia/commons/thumb/3/3e/1_singapore_city_skyline_dusk_panorama_2011.jpg/1280px-1_singapore_city_skyline_dusk_panorama_2011.jpg'
    singapore = File(singapore_url, parent=project, synapseStore=False)
    singapore = syn.store(singapore)

    # Verify the file handle
    fileHandle = syn._getFileHandle(singapore.dataFileHandleId)
    assert fileHandle['concreteType'] == 'org.sagebionetworks.repo.model.file.ExternalFileHandle'
    assert fileHandle['externalURL']  == singapore_url

    # The download should occur only on the client side
    singapore = syn.get(singapore, downloadFile=True)
    assert singapore.path is not None
    assert singapore.externalURL == singapore_url
    assert os.path.exists(singapore.path)

    # Update external URL
    singapore_2_url = 'https://upload.wikimedia.org/wikipedia/commons/a/a2/Singapore_Panorama_v2.jpg'
    singapore.externalURL = singapore_2_url
    singapore = syn.store(singapore)
    s2 = syn.get(singapore, downloadFile=False)
    assert s2.externalURL == singapore_2_url
def test_download_file_false():
    RENAME_SUFFIX = 'blah'
    
    # Upload a file
    filepath = utils.make_bogus_binary_file()
    schedule_for_cleanup(filepath)
    schedule_for_cleanup(filepath + RENAME_SUFFIX)
    file = File(filepath, name='SYNR 619', parent=project)
    file = syn.store(file)
    
    # Now hide the file from the cache and download with downloadFile=False
    os.rename(filepath, filepath + RENAME_SUFFIX)
    file = syn.get(file.id, downloadFile=False)
    
    # Change something and reupload the file's metadata
    file.name = "Only change the name, not the file"
    reupload = syn.store(file)
    assert reupload.path is None, "Path field should be null: %s" % reupload.path
    
    # This should still get the correct file
    reupload = syn.get(reupload.id)
    assert filecmp.cmp(filepath + RENAME_SUFFIX, reupload.path)
    assert reupload.name == file.name
def test_round_trip():
    fh = None
    filepath = utils.make_bogus_binary_file(6*MB + 777771, verbose=True)
    print 'Made bogus file: ', filepath
    try:
        fh = syn._chunkedUploadFile(filepath, verbose=False)

        print '=' * 60
        print 'FileHandle:'
        syn.printEntity(fh)

        print 'creating project and file'
        project = create_project()
        junk = File(filepath, parent=project, dataFileHandleId=fh['id'])
        junk.properties.update(syn._createEntity(junk.properties))

        print 'downloading file'
        junk.update(syn._downloadFileEntity(junk, filepath))

        print 'comparing files'
        assert filecmp.cmp(filepath, junk.path)

        print 'ok!'

    finally:
        try:
            if 'junk' in locals():
                syn.delete(junk)
        except Exception as ex:
            print ex
        try:
            os.remove(filepath)
        except Exception as ex:
            print ex
        if fh:
            print 'Deleting fileHandle', fh['id']
            syn._deleteFileHandle(fh)
def test_download_file_URL_false():
    # Upload an external file handle
    fileThatExists = 'http://dev-versions.synapse.sagebase.org/synapsePythonClient'
    reupload = File(fileThatExists, synapseStore=False, parent=project)
    reupload = syn.store(reupload)
    reupload = syn.get(reupload, downloadFile=False)
    originalVersion = reupload.versionNumber
    
    # Reupload and check that the URL and version does not get mangled
    reupload = syn.store(reupload, forceVersion=False)
    assert reupload.path == fileThatExists, "Entity should still be pointing at a URL"
    assert originalVersion == reupload.versionNumber

    # Try a URL with an extra slash at the end
    fileThatDoesntExist = 'http://dev-versions.synapse.sagebase.org/synapsePythonClient/'
    reupload.synapseStore = False
    reupload.path = fileThatDoesntExist
    reupload = syn.store(reupload)
    reupload = syn.get(reupload, downloadFile=False)
    originalVersion = reupload.versionNumber
    
    reupload = syn.store(reupload, forceVersion=False)
    assert reupload.path == fileThatDoesntExist, "Entity should still be pointing at a URL"
    assert originalVersion == reupload.versionNumber
def test_Entity():
    # Update the project
    project_name = str(uuid.uuid4())
    project = Project(name=project_name)
    project = syn.store(project)
    schedule_for_cleanup(project)
    project = syn.getEntity(project)
    assert project.name == project_name
    
    # Create and get a Folder
    folder = Folder('Test Folder', parent=project, description='A place to put my junk', foo=1000)
    folder = syn.createEntity(folder)
    folder = syn.getEntity(folder)
    assert folder.name == 'Test Folder'
    assert folder.parentId == project.id
    assert folder.description == 'A place to put my junk'
    assert folder.foo[0] == 1000
    
    # Update and get the Folder
    folder.pi = 3.14159265359
    folder.description = 'The rejects from the other folder'
    folder = syn.store(folder)
    folder = syn.get(folder)
    assert folder.name == 'Test Folder'
    assert folder.parentId == project.id
    assert folder.description == 'The rejects from the other folder'
    assert folder.pi[0] == 3.14159265359

    # Test CRUD on Files, check unicode
    path = utils.make_bogus_data_file()
    schedule_for_cleanup(path)
    a_file = File(path, parent=folder, description=u'Description with funny characters: Déjà vu, ประเทศไทย, 中国',
                  contentType='text/flapdoodle',
                  foo='An arbitrary value',
                  bar=[33,44,55],
                  bday=Datetime(2013,3,15),
                  band=u"Motörhead",
                  lunch=u"すし")
    a_file = syn.store(a_file)
    assert a_file.path == path

    a_file = syn.getEntity(a_file)
    assert a_file.description == u'Description with funny characters: Déjà vu, ประเทศไทย, 中国', u'description= %s' % a_file.description
    assert a_file['foo'][0] == 'An arbitrary value', u'foo= %s' % a_file['foo'][0]
    assert a_file['bar'] == [33,44,55]
    assert a_file['bday'][0] == Datetime(2013,3,15)
    assert a_file.contentType == 'text/flapdoodle', u'contentType= %s' % a_file.contentType
    assert a_file['band'][0] == u"Motörhead", u'band= %s' % a_file['band'][0]
    assert a_file['lunch'][0] == u"すし", u'lunch= %s' % a_file['lunch'][0]
    
    a_file = syn.downloadEntity(a_file)
    assert filecmp.cmp(path, a_file.path)

    assert_raises(ValueError,File,a_file.path,parent=folder,dataFileHandleId=56456)
    b_file = File(name="blah",parent=folder,dataFileHandleId=a_file.dataFileHandleId)
    b_file = syn.store(b_file)

    assert b_file.dataFileHandleId == a_file.dataFileHandleId
    # Update the File
    a_file.path = path
    a_file['foo'] = 'Another arbitrary chunk of text data'
    a_file['new_key'] = 'A newly created value'
    a_file = syn.updateEntity(a_file)
    assert a_file['foo'][0] == 'Another arbitrary chunk of text data'
    assert a_file['bar'] == [33,44,55]
    assert a_file['bday'][0] == Datetime(2013,3,15)
    assert a_file.new_key[0] == 'A newly created value'
    assert a_file.path == path
    assert a_file.versionNumber == 1, "unexpected version number: " +  str(a_file.versionNumber)

    #Test create, store, get Links
    link = Link(a_file['id'], 
                targetVersion=a_file.versionNumber,
                parent=project)
    link = syn.store(link)
    assert link['linksTo']['targetId'] == a_file['id']
    assert link['linksTo']['targetVersionNumber'] == a_file.versionNumber
    assert link['linksToClassName'] == a_file['concreteType']
    
    testLink = syn.get(link)
    assert testLink == link

    link = syn.get(link,followLink= True)
    assert link['foo'][0] == 'Another arbitrary chunk of text data'
    assert link['bar'] == [33,44,55]
    assert link['bday'][0] == Datetime(2013,3,15)
    assert link.new_key[0] == 'A newly created value'
    assert utils.equal_paths(link.path, path)
    assert link.versionNumber == 1, "unexpected version number: " +  str(a_file.versionNumber)

    # Upload a new File and verify
    new_path = utils.make_bogus_data_file()
    schedule_for_cleanup(new_path)
    a_file = syn.uploadFile(a_file, new_path)
    a_file = syn.downloadEntity(a_file)
    assert filecmp.cmp(new_path, a_file.path)
    assert a_file.versionNumber == 2

    # Make sure we can still get the older version of file
    old_random_data = syn.get(a_file.id, version=1)
    assert filecmp.cmp(old_random_data.path, path)

    tmpdir = tempfile.mkdtemp()
    schedule_for_cleanup(tmpdir)

    ## test file name override
    a_file.fileNameOverride = "peaches_en_regalia.zoinks"
    syn.store(a_file)
    ## TODO We haven't defined how filename override interacts with
    ## TODO previously cached files so, side-step that for now by
    ## TODO making sure the file is not in the cache!
    syn.cache.remove(a_file.dataFileHandleId, delete=True)
    a_file_retreived = syn.get(a_file, downloadLocation=tmpdir)
    assert os.path.basename(a_file_retreived.path) == a_file.fileNameOverride, os.path.basename(a_file_retreived.path)

    ## test getting the file from the cache with downloadLocation parameter (SYNPY-330)
    a_file_cached = syn.get(a_file.id, downloadLocation=tmpdir)
    assert a_file_cached.path is not None
    assert os.path.basename(a_file_cached.path) == a_file.fileNameOverride, a_file_cached.path

    print("\n\nList of files in project:\n")
    syn._list(project, recursive=True)
Beispiel #44
0
def _copyFile(syn, entity, destinationId, version=None, update=False, setProvenance="traceback"):
    """
    Copies most recent version of a file to a specified synapse ID.

    :param entity:          A synapse ID of a File entity

    :param destinationId:   Synapse ID of a folder/project that the file wants to be copied to

    :param version:         Can specify version of a file. 
                            Default to None

    :param update:          Can choose to update files that have the same name 
                            Default to False
    
    :param setProvenance:   Has three values to set the provenance of the copied entity:
                                traceback: Sets to the source entity
                                existing: Sets to source entity's original provenance (if it exists)
                                None: No provenance is set
    """
    ent = syn.get(entity, downloadFile=False, version=version, followLink=False)
    #CHECK: If File is in the same parent directory (throw an error) (Can choose to update files)
    if not update:
        search = syn.query('select name from entity where parentId =="%s"'%destinationId)
        for i in search['results']:
            if i['entity.name'] == ent.name:
                raise ValueError('An item named "%s" already exists in this location. File could not be copied'%ent.name)
    profile = syn.getUserProfile()
    # get provenance earlier to prevent errors from being called in the end
    # If traceback, set activity to old entity
    if setProvenance == "traceback":
        act = Activity("Copied file", used=ent)
    # if existing, check if provenance exists
    elif setProvenance == "existing":
        try:
            act = syn.getProvenance(ent.id)
        except SynapseHTTPError as e:
            # Should catch the 404
            act = None
    elif setProvenance is None or setProvenance.lower() == 'none':
        act = None
    else:
        raise ValueError('setProvenance must be one of None, existing, or traceback')
    #Grab file handle createdBy annotation to see the user that created fileHandle
    fileHandleList = syn.restGET('/entity/%s/version/%s/filehandles'%(ent.id,ent.versionNumber))
    #NOTE: May not always be the first index (need to filter to make sure not PreviewFileHandle)
    #Loop through to check which dataFileHandles match and return createdBy
    # Look at convenience function
    for fileHandle in fileHandleList['list']:
        if fileHandle['id'] == ent.dataFileHandleId:
            createdBy = fileHandle['createdBy']
            break
    else:
        createdBy = None
    #CHECK: If the user created the file, copy the file by using fileHandleId else hard copy
    if profile.ownerId == createdBy:
        new_ent = File(name=ent.name, parentId=destinationId)
        new_ent.dataFileHandleId = ent.dataFileHandleId
    else:
        #CHECK: If the synapse entity is an external URL, change path and store
        if ent.externalURL is None: #and ent.path == None:
            #####If you have never downloaded the file before, the path is None
            store = True
            #This needs to be here, because if the file has never been downloaded before
            #there wont be a ent.path
            ent = syn.get(entity,downloadFile=store,version=version)
            path = ent.path
        else:
            store = False
            ent = syn.get(entity,downloadFile=store,version=version)
            path = ent.externalURL

        new_ent = File(path, name=ent.name, parentId=destinationId, synapseStore=store)
    #Set annotations here
    new_ent.annotations = ent.annotations
    #Store provenance if act is not None
    if act is not None:
        new_ent = syn.store(new_ent, activity=act)
    else:
        new_ent = syn.store(new_ent)
    #Leave this return statement for test
    return new_ent['id']
                  'tissueTypeAbrv': ['FP', 'STG', 'PHG'],
                  'name' :'AMP-AD_MSBB_MSSM_IlluminaHiSeq2500_mRNA_rawCounts.tsv'},
    'syn2920161':{'parentId' :'syn3157743',  #'normalized.sex_race_age_RIN_PMI_batch_site.corrected.csv'
                  'dataType': 'mRNA',                  
                  'platform': 'IlluminaHiSeq2500',
                  'tissueType':['Frontal Pole', 'Superior Temporal Gyrus','Parahippocampal Gyrus'],
                  'tissueTypeAbrv': ['FP', 'STG', 'PHG'],
                  'name' :'AMP-AD_MSBB_MSSM_IlluminaHiSeq2500_mRNA_normalized-sex-race-age-RIN-PMI-batch-site.corrected.csv'},
    }


for id, v in toMove.items():
    ent = syn.get(id)
    print v['name']
    os.rename(ent.path, v['name'])

    f = File(v['name'], parentId=v['parentId'], name=v['name'][7:-4])
    print f.name
    f.consortium, f.study, f.center, f.disease = consortium, study, center, disease
    f.dataType =  v['dataType']
    f.platfrom = v['platform']
    if 'tissueTypeAbrv' in v:
        f.tissueTypeAbrv = v['tissueTypeAbrv']
        f.tissueType = v['tissueType']
    f.fileType =  fileType
    f.organism =  organism
    f = syn.store(f, used = [id], executed=['https://github.com/Sage-Bionetworks/ampAdScripts/blob/e71bbde262625e6999ea9defd98e10fce8f3c542/Mount-Sinai/migrateMSBBMetaAndRNASeq.py'], 
                  activityName='Data migration')
    

def test_Entity():
    # Update the project
    project_name = str(uuid.uuid4())
    project = Project(name=project_name)
    project = syn.store(project)
    schedule_for_cleanup(project)
    project = syn.getEntity(project)
    assert project.name == project_name
    
    # Create and get a Folder
    folder = Folder('Test Folder', parent=project, description='A place to put my junk', foo=1000)
    folder = syn.createEntity(folder)
    folder = syn.getEntity(folder)
    assert folder.name == 'Test Folder'
    assert folder.parentId == project.id
    assert folder.description == 'A place to put my junk'
    assert folder.foo[0] == 1000
    
    # Update and get the Folder
    folder.pi = 3.14159265359
    folder.description = 'The rejects from the other folder'
    folder = syn.store(folder)
    folder = syn.get(folder)
    assert folder.name == 'Test Folder'
    assert folder.parentId == project.id
    assert folder.description == 'The rejects from the other folder'
    assert folder.pi[0] == 3.14159265359

    # Test CRUD on Files
    path = utils.make_bogus_data_file()
    schedule_for_cleanup(path)
    a_file = File(path, parent=folder, description='Random data for testing',
                  contentType='text/flapdoodle',
                  foo='An arbitrary value', bar=[33,44,55], bday=Datetime(2013,3,15))
    a_file = syn._createFileEntity(a_file)
    assert a_file.path == path
    
    a_file = syn.getEntity(a_file)
    assert a_file['foo'][0] == 'An arbitrary value'
    assert a_file['bar'] == [33,44,55]
    assert a_file['bday'][0] == Datetime(2013,3,15)
    assert a_file.contentType == 'text/flapdoodle'
    
    a_file = syn.downloadEntity(a_file)
    assert filecmp.cmp(path, a_file.path)

    # Update the File
    a_file.path = path
    a_file['foo'] = 'Another arbitrary chunk of text data'
    a_file['new_key'] = 'A newly created value'
    a_file = syn.updateEntity(a_file)
    assert a_file['foo'][0] == 'Another arbitrary chunk of text data'
    assert a_file['bar'] == [33,44,55]
    assert a_file['bday'][0] == Datetime(2013,3,15)
    assert a_file.new_key[0] == 'A newly created value'
    assert a_file.path == path
    assert a_file.versionNumber == 1

    # Upload a new File and verify
    new_path = utils.make_bogus_data_file()
    schedule_for_cleanup(new_path)
    a_file = syn.uploadFile(a_file, new_path)
    a_file = syn.downloadEntity(a_file)
    assert filecmp.cmp(new_path, a_file.path)
    assert a_file.versionNumber == 2

    # Make sure we can still get the older version of file
    old_random_data = syn.get(a_file.id, version=1)
    assert filecmp.cmp(old_random_data.path, path)
PLATFORM_MAP = {'133AB': 'AffymetrixU133AB', 
                'Plus2': 'AffymetrixU133Plus2'}
	
query = 'select id, name from entity where parentId=="%s"' %OLDPARENTID
df = synapseHelpers.query2df(syn.chunkedQuery(query))
for i in range(1,df.shape[0]):
    row =  df.ix[i, :]
    ent = syn.get(row.id)
    fStudy, fTissue, fPlatform, fDatatype,  fRest = ent.name.split('_')
    name = 'AMP-AD_MSBB_MSSM_%s_%s_%s' % (PLATFORM_MAP[fPlatform],   
                                          TISSUEABRMAP[fTissue][0], fRest)
    print name
    os.rename(ent.path, name)

    f = File(name, parentId=NEWPARENTID, name=name[7:])
    f.consortium = 'AMP-AD'
    f.study = 'MSBB'
    f.center = 'MSSM'
    f.dataType =  'mRNA'
    f.disease = 'Alzheimers Disease'
    f.platfrom = PLATFORM_MAP[fPlatform]
    f.tissueTypeAbrv = TISSUEABRMAP[fTissue][1]
    f.tissueType = TISSUEABRMAP[fTissue][0]
    f.dataSubType = 'geneExp'
    f.fileType =  'genomicMatrix'
    f.organism =  'human'
    f = syn.store(f, used = [ent], executed=['https://github.com/Sage-Bionetworks/ampAdScripts/blob/4d7d6b78b1e73058483354a1a18bff7422966a4b/Mount-Sinai/migrateMSBBExpression.py'], activityName='Data migration')
    

                    okay = False
            if okay:

                #-----------------------------------------------------------------                                              
                # Upload the file to the correct path:                                                                          
                #-----------------------------------------------------------------                                              
                path = os.path.join(dirpath, filename)
                stat = os.stat(path)
                if stat.st_size > 0:

                    mtime = stat.st_mtime
                    previous_mtime = previous_uploads.get(path, None)
                    if mtime > previous_mtime:

                        print('Uploading {0}...'.format(path))
                        f = File(path, parent=parents[dirpath], name=filename)

                        #-------------------------------------------------------------                                              
                        # Annotate the file on Synapse:                                                                             
                        #-------------------------------------------------------------                                              
                        for istr2, str2 in enumerate(types):
                            if filename.endswith(str2):
                                f.fileType = type_names[istr2]

                        # Optionally add "syn.store(f, used='http://..)"                                                            
                        # to specify the source location                                                                            
                        syn.store(f)

                        c = conn.cursor()
                        c.execute('INSERT OR REPLACE INTO files (path, mtime) VALUES ("%s", "%s")' % (path, mtime))
                        conn.commit()
def test_Entity():
    # Update the project
    project_name = str(uuid.uuid4())
    project = Project(name=project_name)
    project = syn.store(project)
    schedule_for_cleanup(project)
    project = syn.getEntity(project)
    assert_equals(project.name, project_name)
    
    # Create and get a Folder
    folder = Folder('Test Folder', parent=project, description='A place to put my junk', foo=1000)
    folder = syn.createEntity(folder)
    folder = syn.getEntity(folder)
    assert_equals(folder.name, 'Test Folder')
    assert_equals(folder.parentId, project.id)
    assert_equals(folder.description, 'A place to put my junk')
    assert_equals(folder.foo[0], 1000)
    
    # Update and get the Folder
    folder.pi = 3.14159265359
    folder.description = 'The rejects from the other folder'
    folder = syn.store(folder)
    folder = syn.get(folder)
    assert_equals(folder.name, 'Test Folder')
    assert_equals(folder.parentId, project.id)
    assert_equals(folder.description, 'The rejects from the other folder')
    assert_equals(folder.pi[0], 3.14159265359)

    # Test CRUD on Files, check unicode
    path = utils.make_bogus_data_file()
    schedule_for_cleanup(path)
    a_file = File(path, parent=folder, description=u'Description with funny characters: Déjà vu, ประเทศไทย, 中国',
                  contentType='text/flapdoodle',
                  foo='An arbitrary value',
                  bar=[33, 44, 55],
                  bday=Datetime(2013, 3, 15),
                  band=u"Motörhead",
                  lunch=u"すし")
    a_file = syn.store(a_file)
    assert_equals(a_file.path, path)

    a_file = syn.getEntity(a_file)
    assert_equals(a_file.description,
                  u'Description with funny characters: Déjà vu, ประเทศไทย, 中国', u'description= %s'
                  % a_file.description)
    assert_equals(a_file['foo'][0], 'An arbitrary value', u'foo= %s' % a_file['foo'][0])
    assert_equals(a_file['bar'], [33, 44, 55])
    assert_equals(a_file['bday'][0], Datetime(2013, 3, 15))
    assert_equals(a_file.contentType, 'text/flapdoodle', u'contentType= %s' % a_file.contentType)
    assert_equals(a_file['band'][0], u"Motörhead", u'band= %s' % a_file['band'][0])
    assert_equals(a_file['lunch'][0], u"すし", u'lunch= %s' % a_file['lunch'][0])
    
    a_file = syn.downloadEntity(a_file)
    assert_true(filecmp.cmp(path, a_file.path))

    b_file = File(name="blah", parent=folder, dataFileHandleId=a_file.dataFileHandleId)
    b_file = syn.store(b_file)

    assert_equals(b_file.dataFileHandleId, a_file.dataFileHandleId)
    # Update the File
    a_file.path = path
    a_file['foo'] = 'Another arbitrary chunk of text data'
    a_file['new_key'] = 'A newly created value'
    a_file = syn.updateEntity(a_file)
    assert_equals(a_file['foo'][0], 'Another arbitrary chunk of text data')
    assert_equals(a_file['bar'], [33, 44, 55])
    assert_equals(a_file['bday'][0], Datetime(2013, 3, 15))
    assert_equals(a_file.new_key[0], 'A newly created value')
    assert_equals(a_file.path, path)
    assert_equals(a_file.versionNumber, 1, "unexpected version number: " + str(a_file.versionNumber))

    # Test create, store, get Links
    # If version isn't specified, targetVersionNumber should not be set
    link = Link(a_file['id'], 
                parent=project)
    link = syn.store(link)
    assert_equals(link['linksTo']['targetId'], a_file['id'])
    assert_is_none(link['linksTo'].get('targetVersionNumber'))
    assert_equals(link['linksToClassName'], a_file['concreteType'])

    link = Link(a_file['id'], 
                targetVersion=a_file.versionNumber,
                parent=project)
    link = syn.store(link)
    assert_equals(link['linksTo']['targetId'], a_file['id'])
    assert_equals(link['linksTo']['targetVersionNumber'], a_file.versionNumber)
    assert_equals(link['linksToClassName'], a_file['concreteType'])
    
    testLink = syn.get(link)
    assert_equals(testLink, link)

    link = syn.get(link, followLink=True)
    assert_equals(link['foo'][0], 'Another arbitrary chunk of text data')
    assert_equals(link['bar'], [33, 44, 55])
    assert_equals(link['bday'][0], Datetime(2013, 3, 15))
    assert_equals(link.new_key[0], 'A newly created value')
    assert_true(utils.equal_paths(link.path, path))
    assert_equals(link.versionNumber, 1, "unexpected version number: " + str(a_file.versionNumber))

    newfolder = Folder('Testing Folder', parent=project)
    newfolder = syn.store(newfolder)
    link = Link(newfolder, parent=folder.id)
    link = syn.store(link)
    assert_equals(link['linksTo']['targetId'], newfolder.id)
    assert_equals(link['linksToClassName'], newfolder['concreteType'])
    assert_is_none(link['linksTo'].get('targetVersionNumber'))

    # Upload a new File and verify
    new_path = utils.make_bogus_data_file()
    schedule_for_cleanup(new_path)
    a_file = syn.uploadFile(a_file, new_path)
    a_file = syn.downloadEntity(a_file)
    assert_true(filecmp.cmp(new_path, a_file.path))
    assert_equals(a_file.versionNumber, 2)

    # Make sure we can still get the older version of file
    old_random_data = syn.get(a_file.id, version=1)
    assert_true(filecmp.cmp(old_random_data.path, path))

    tmpdir = tempfile.mkdtemp()
    schedule_for_cleanup(tmpdir)

    # test getting the file from the cache with downloadLocation parameter (SYNPY-330)
    a_file_cached = syn.get(a_file.id, downloadLocation=tmpdir)
    assert_is_not_none(a_file_cached.path)
    assert_equal(os.path.basename(a_file_cached.path), os.path.basename(a_file.path))
def test_Entity():
    # Update the project
    project_name = str(uuid.uuid4())
    project = Project(name=project_name)
    project = syn.store(project)
    schedule_for_cleanup(project)
    project = syn.getEntity(project)
    assert project.name == project_name
    
    # Create and get a Folder
    folder = Folder('Test Folder', parent=project, description='A place to put my junk', foo=1000)
    folder = syn.createEntity(folder)
    folder = syn.getEntity(folder)
    assert folder.name == 'Test Folder'
    assert folder.parentId == project.id
    assert folder.description == 'A place to put my junk'
    assert folder.foo[0] == 1000
    
    # Update and get the Folder
    folder.pi = 3.14159265359
    folder.description = 'The rejects from the other folder'
    folder = syn.store(folder)
    folder = syn.get(folder)
    assert folder.name == 'Test Folder'
    assert folder.parentId == project.id
    assert folder.description == 'The rejects from the other folder'
    assert folder.pi[0] == 3.14159265359

    # Test CRUD on Files, check unicode
    path = utils.make_bogus_data_file()
    schedule_for_cleanup(path)
    a_file = File(path, parent=folder, description=u'Description with funny characters: Déjà vu, ประเทศไทย, 中国',
                  contentType='text/flapdoodle',
                  foo='An arbitrary value',
                  bar=[33,44,55],
                  bday=Datetime(2013,3,15),
                  band=u"Motörhead",
                  lunch=u"すし")
    a_file = syn.store(a_file)
    assert a_file.path == path

    a_file = syn.getEntity(a_file)
    assert a_file.description == u'Description with funny characters: Déjà vu, ประเทศไทย, 中国', u'description= %s' % a_file.description
    assert a_file['foo'][0] == 'An arbitrary value', u'foo= %s' % a_file['foo'][0]
    assert a_file['bar'] == [33,44,55]
    assert a_file['bday'][0] == Datetime(2013,3,15)
    assert a_file.contentType == 'text/flapdoodle', u'contentType= %s' % a_file.contentType
    assert a_file['band'][0] == u"Motörhead", u'band= %s' % a_file['band'][0]
    assert a_file['lunch'][0] == u"すし", u'lunch= %s' % a_file['lunch'][0]
    
    a_file = syn.downloadEntity(a_file)
    assert filecmp.cmp(path, a_file.path)

    # Update the File
    a_file.path = path
    a_file['foo'] = 'Another arbitrary chunk of text data'
    a_file['new_key'] = 'A newly created value'
    a_file = syn.updateEntity(a_file)
    assert a_file['foo'][0] == 'Another arbitrary chunk of text data'
    assert a_file['bar'] == [33,44,55]
    assert a_file['bday'][0] == Datetime(2013,3,15)
    assert a_file.new_key[0] == 'A newly created value'
    assert a_file.path == path
    assert a_file.versionNumber == 1, "unexpected version number: " +  str(a_file.versionNumber)

    # Upload a new File and verify
    new_path = utils.make_bogus_data_file()
    schedule_for_cleanup(new_path)
    a_file = syn.uploadFile(a_file, new_path)
    a_file = syn.downloadEntity(a_file)
    assert filecmp.cmp(new_path, a_file.path)
    assert a_file.versionNumber == 2

    # Make sure we can still get the older version of file
    old_random_data = syn.get(a_file.id, version=1)
    assert filecmp.cmp(old_random_data.path, path)
def test_Entity():
    # Update the project
    project_name = str(uuid.uuid4())
    project = Project(name=project_name)
    project = syn.store(project)
    schedule_for_cleanup(project)
    project = syn.getEntity(project)
    assert project.name == project_name

    # Create and get a Folder
    folder = Folder("Test Folder", parent=project, description="A place to put my junk", foo=1000)
    folder = syn.createEntity(folder)
    folder = syn.getEntity(folder)
    assert folder.name == "Test Folder"
    assert folder.parentId == project.id
    assert folder.description == "A place to put my junk"
    assert folder.foo[0] == 1000

    # Update and get the Folder
    folder.pi = 3.14159265359
    folder.description = "The rejects from the other folder"
    folder = syn.store(folder)
    folder = syn.get(folder)
    assert folder.name == "Test Folder"
    assert folder.parentId == project.id
    assert folder.description == "The rejects from the other folder"
    assert folder.pi[0] == 3.14159265359

    # Test CRUD on Files, check unicode
    path = utils.make_bogus_data_file()
    schedule_for_cleanup(path)
    a_file = File(
        path,
        parent=folder,
        description="Description with funny characters: Déjà vu, ประเทศไทย, 中国",
        contentType="text/flapdoodle",
        foo="An arbitrary value",
        bar=[33, 44, 55],
        bday=Datetime(2013, 3, 15),
        band="Motörhead",
        lunch="すし",
    )
    a_file = syn.store(a_file)
    assert a_file.path == path

    a_file = syn.getEntity(a_file)
    assert a_file.description == "Description with funny characters: Déjà vu, ประเทศไทย, 中国", (
        "description= %s" % a_file.description
    )
    assert a_file["foo"][0] == "An arbitrary value", "foo= %s" % a_file["foo"][0]
    assert a_file["bar"] == [33, 44, 55]
    assert a_file["bday"][0] == Datetime(2013, 3, 15)
    assert a_file.contentType == "text/flapdoodle", "contentType= %s" % a_file.contentType
    assert a_file["band"][0] == "Motörhead", "band= %s" % a_file["band"][0]
    assert a_file["lunch"][0] == "すし", "lunch= %s" % a_file["lunch"][0]

    a_file = syn.downloadEntity(a_file)
    assert filecmp.cmp(path, a_file.path)

    # Update the File
    a_file.path = path
    a_file["foo"] = "Another arbitrary chunk of text data"
    a_file["new_key"] = "A newly created value"
    a_file = syn.updateEntity(a_file)
    assert a_file["foo"][0] == "Another arbitrary chunk of text data"
    assert a_file["bar"] == [33, 44, 55]
    assert a_file["bday"][0] == Datetime(2013, 3, 15)
    assert a_file.new_key[0] == "A newly created value"
    assert a_file.path == path
    assert a_file.versionNumber == 1

    # Upload a new File and verify
    new_path = utils.make_bogus_data_file()
    schedule_for_cleanup(new_path)
    a_file = syn.uploadFile(a_file, new_path)
    a_file = syn.downloadEntity(a_file)
    assert filecmp.cmp(new_path, a_file.path)
    assert a_file.versionNumber == 2

    # Make sure we can still get the older version of file
    old_random_data = syn.get(a_file.id, version=1)
    assert filecmp.cmp(old_random_data.path, path)
with open(input_path + ".json") as handle:
    meta_data = json.loads(handle.read())

DST_FOLDER = 'syn3079564' #test upload folder

#Create Provenance log
provenance = Activity(name=meta_data['activity'],
                      desciption=meta_data['description'],
                      used = meta_data['used']
                      exectuted = meta_data['used']
                )
#prov = syn.store(prov)

name  = of.path.basename(input_path)
#Add metadata to files to be uploaded
f = File(input_path, name = name, parentId=DST_FOLDER)
f.dataType = meta_data['dataType']
f.fileType = meta_data['dataType']
f.variant_workflow = meta_data['workflow']
f.variant_workflow_version = meta_data['workflowVersion']
f.call_type = call_type
f.reference_build = meta_data['referenceBuild']
f.center_name = meta_data['center_name']
f.file_md5 = synapseclient.utils.md5_for_file(input_path)
f.study = 'PCAWG 2.0'
f.submitter_donor_id = meta_data['donor_id']
f.alignment_workflow_name='Workflow_Bundle_BWA (UCSC Implementation)'
f.alignment_workflow_source_url='https://github.com/kellrott/tcga_realign'
f.alignment_workflow_version='2.6.0'

    def test_getWithEntityBundle(self, download_file_mock, get_file_URL_and_metadata_mock):
        # Note: one thing that remains unexplained is why the previous version of
        # this test worked if you had a .cacheMap file of the form:
        # {"/Users/chris/.synapseCache/663/-1337/anonymous": "2014-09-15T22:54:57.000Z",
        #  "/var/folders/ym/p7cr7rrx4z7fw36sxv04pqh00000gq/T/tmpJ4nz8U": "2014-09-15T23:27:25.000Z"}
        # ...but failed if you didn't.

        bundle = {
            'entity': {
                'id': 'syn10101',
                'name': 'anonymous',
                'dataFileHandleId': '-1337',
                'concreteType': 'org.sagebionetworks.repo.model.FileEntity',
                'parentId': 'syn12345'},
            'fileHandles': [{
                'concreteType': 'org.sagebionetworks.repo.model.file.S3FileHandle',
                'fileName': 'anonymous',
                'contentType': 'application/flapdoodle',
                'contentMd5': '1698d26000d60816caab15169efcd23a',
                'id': '-1337'}],
            'annotations': {}}

        fileHandle = bundle['fileHandles'][0]['id']
        cacheDir = syn.cache.get_cache_dir(fileHandle)
        # Make sure the .cacheMap file does not already exist
        cacheMap = os.path.join(cacheDir, '.cacheMap')
        if os.path.exists(cacheMap):
            os.remove(cacheMap)

        def _downloadFileHandle(fileHandleId,  objectId, objectType, path, retries=5):
            # touch file at path
            with open(path, 'a'):
                os.utime(path, None)
            os.path.split(path)
            syn.cache.add(fileHandle, path)
            return path

        def _getFileHandleDownload(fileHandleId,  objectId, objectType='FileHandle'):
            return {'fileHandle': bundle['fileHandles'][0], 'fileHandleId': fileHandleId,
                    'preSignedURL': 'http://example.com'}

        download_file_mock.side_effect = _downloadFileHandle
        get_file_URL_and_metadata_mock.side_effect = _getFileHandleDownload

        # 1. ----------------------------------------------------------------------
        # download file to an alternate location

        temp_dir1 = tempfile.mkdtemp()

        e = syn._getWithEntityBundle(entityBundle=bundle,
                                     downloadLocation=temp_dir1,
                                     ifcollision="overwrite.local")

        assert_equal(e.name, bundle["entity"]["name"])
        assert_equal(e.parentId, bundle["entity"]["parentId"])
        assert_equal(utils.normalize_path(os.path.abspath(os.path.dirname(e.path))), utils.normalize_path(temp_dir1))
        assert_equal(bundle["fileHandles"][0]["fileName"], os.path.basename(e.path))
        assert_equal(utils.normalize_path(os.path.abspath(e.path)),
                     utils.normalize_path(os.path.join(temp_dir1, bundle["fileHandles"][0]["fileName"])))

        # 2. ----------------------------------------------------------------------
        # get without specifying downloadLocation
        e = syn._getWithEntityBundle(entityBundle=bundle, ifcollision="overwrite.local")

        assert_equal(e.name, bundle["entity"]["name"])
        assert_equal(e.parentId, bundle["entity"]["parentId"])
        assert_in(bundle["fileHandles"][0]["fileName"], e.files)

        # 3. ----------------------------------------------------------------------
        # download to another location
        temp_dir2 = tempfile.mkdtemp()
        assert_not_equals(temp_dir2, temp_dir1)
        e = syn._getWithEntityBundle(entityBundle=bundle,
                                     downloadLocation=temp_dir2,
                                     ifcollision="overwrite.local")

        assert_in(bundle["fileHandles"][0]["fileName"], e.files)
        assert_is_not_none(e.path)
        assert_true(utils.equal_paths(os.path.dirname(e.path), temp_dir2))

        # 4. ----------------------------------------------------------------------
        # test preservation of local state
        url = 'http://foo.com/secretstuff.txt'
        # need to create a bundle with externalURL
        externalURLBundle = dict(bundle)
        externalURLBundle['fileHandles'][0]['externalURL'] = url
        e = File(name='anonymous', parentId="syn12345", synapseStore=False, externalURL=url)
        e.local_state({'zap': 'pow'})
        e = syn._getWithEntityBundle(entityBundle=externalURLBundle, entity=e)
        assert_equal(e.local_state()['zap'], 'pow')
        assert_equal(e.synapseStore, False)
        assert_equal(e.externalURL, url)
def _copyFile(syn, entity, destinationId, version=None, updateExisting=False, setProvenance="traceback",
              skipCopyAnnotations=False):
    """
    Copies most recent version of a file to a specified synapse ID.

    :param entity:              A synapse ID of a File entity

    :param destinationId:       Synapse ID of a folder/project that the file wants to be copied to

    :param version:             Can specify version of a file. 
                                Default to None

    :param updateExisting:      Can choose to update files that have the same name 
                                Default to False
    
    :param setProvenance:       Has three values to set the provenance of the copied entity:
                                    traceback: Sets to the source entity
                                    existing: Sets to source entity's original provenance (if it exists)
                                    None: No provenance is set
    :param skipCopyAnnotations: Skips copying the annotations
                                Default is False
    """
    ent = syn.get(entity, downloadFile=False, version=version, followLink=False)
    # CHECK: If File is in the same parent directory (throw an error) (Can choose to update files)
    if not updateExisting:
        existingEntity = syn.findEntityId(ent.name, parent=destinationId)
        if existingEntity is not None:
            raise ValueError('An entity named "%s" already exists in this location. File could not be copied'
                             % ent.name)
    profile = syn.getUserProfile()
    # get provenance earlier to prevent errors from being called in the end
    # If traceback, set activity to old entity
    if setProvenance == "traceback":
        act = Activity("Copied file", used=ent)
    # if existing, check if provenance exists
    elif setProvenance == "existing":
        try:
            act = syn.getProvenance(ent.id)
        except SynapseHTTPError as e:
            if e.response.status_code == 404:
                act = None
            else:
                raise e
    elif setProvenance is None or setProvenance.lower() == 'none':
        act = None
    else:
        raise ValueError('setProvenance must be one of None, existing, or traceback')
    # Grab entity bundle
    bundle = syn._getEntityBundle(ent.id, version=ent.versionNumber, bitFlags=0x800 | 0x1)
    fileHandle = synapseclient.utils.find_data_file_handle(bundle)
    createdBy = fileHandle['createdBy']
    # CHECK: If the user created the file, copy the file by using fileHandleId else copy the fileHandle
    if profile.ownerId == createdBy:
        newdataFileHandleId = ent.dataFileHandleId
    else:
        copiedFileHandle = copyFileHandles(syn, [fileHandle], ["FileEntity"], [bundle['entity']['id']],
                                           [fileHandle['contentType']], [fileHandle['fileName']])
        # Check if failurecodes exist
        copyResult = copiedFileHandle['copyResults'][0]
        if copyResult.get("failureCode") is not None:
            raise ValueError("%s dataFileHandleId: %s" % (copyResult["failureCode"],
                                                          copyResult['originalFileHandleId']))
        newdataFileHandleId = copyResult['newFileHandle']['id']

    new_ent = File(dataFileHandleId=newdataFileHandleId,  name=ent.name, parentId=destinationId)
    # Set annotations here
    if not skipCopyAnnotations:
        new_ent.annotations = ent.annotations
    # Store provenance if act is not None
    if act is not None:
        new_ent = syn.store(new_ent, activity=act)
    else:
        new_ent = syn.store(new_ent)
    # Leave this return statement for test
    return new_ent['id']
def test_getWithEntityBundle(download_file_mock):

    ## Note: one thing that remains unexplained is why the previous version of
    ## this test worked if you had a .cacheMap file of the form:
    ## {"/Users/chris/.synapseCache/663/-1337/anonymous": "2014-09-15T22:54:57.000Z",
    ##  "/var/folders/ym/p7cr7rrx4z7fw36sxv04pqh00000gq/T/tmpJ4nz8U": "2014-09-15T23:27:25.000Z"}
    ## ...but failed if you didn't.

    ## TODO: Uncomment failing asserts after SYNR-790 and SYNR-697 are fixed

    bundle = {
        'entity': {
            'id': 'syn10101',
            'name': 'anonymous',
            'dataFileHandleId': '-1337',
            'concreteType': 'org.sagebionetworks.repo.model.FileEntity',
            'parentId': 'syn12345'},
        'fileHandles': [{
            'concreteType': 'org.sagebionetworks.repo.model.file.S3FileHandle',
            'fileName': 'anonymous',
            'contentType': 'application/flapdoodle',
            'contentMd5': '1698d26000d60816caab15169efcd23a',
            'id': '-1337'}],
        'annotations': {}}

    fileHandle = bundle['fileHandles'][0]['id']
    cacheDir = syn.cache.get_cache_dir(fileHandle)
    print "cacheDir=", cacheDir

    # Make sure the .cacheMap file does not already exist
    cacheMap = os.path.join(cacheDir, '.cacheMap')
    if os.path.exists(cacheMap):
        print "removing cacheMap file: ", cacheMap
        os.remove(cacheMap)

    def _downloadFileEntity(entity, path, submission):
        print "mock downloading file to:", path
        ## touch file at path
        with open(path, 'a'):
            os.utime(path, None)
        dest_dir, filename = os.path.split(path)
        return {"path": path,
                "files": [filename],
                "cacheDir": dest_dir}
    download_file_mock.side_effect = _downloadFileEntity

    # 1. ----------------------------------------------------------------------
    # download file to an alternate location

    temp_dir1 = tempfile.mkdtemp()
    print "temp_dir1=", temp_dir1

    e = syn._getWithEntityBundle(entityBundle=bundle,
                                 downloadLocation=temp_dir1,
                                 ifcollision="overwrite.local")
    print e

    assert e.name == bundle["entity"]["name"]
    assert e.parentId == bundle["entity"]["parentId"]
    assert e.cacheDir == temp_dir1
    assert bundle["fileHandles"][0]["fileName"] in e.files
    assert e.path == os.path.join(temp_dir1, bundle["fileHandles"][0]["fileName"])

    # 2. ----------------------------------------------------------------------
    # get without specifying downloadLocation
    e = syn._getWithEntityBundle(entityBundle=bundle, ifcollision="overwrite.local")

    print e

    assert e.name == bundle["entity"]["name"]
    assert e.parentId == bundle["entity"]["parentId"]
    assert bundle["fileHandles"][0]["fileName"] in e.files

    # 3. ----------------------------------------------------------------------
    # download to another location
    temp_dir2 = tempfile.mkdtemp()
    assert temp_dir2 != temp_dir1
    e = syn._getWithEntityBundle(entityBundle=bundle,
                                 downloadLocation=temp_dir2,
                                 ifcollision="overwrite.local")
    print "temp_dir2=", temp_dir2
    print e

    assert_in(bundle["fileHandles"][0]["fileName"], e.files)
    assert e.path is not None
    assert_equal( os.path.dirname(e.path), temp_dir2 )

    # 4. ----------------------------------------------------------------------
    ## test preservation of local state
    url = 'http://foo.com/secretstuff.txt'
    e = File(name='anonymous', parentId="syn12345", synapseStore=False, externalURL=url)
    e.local_state({'zap':'pow'})
    e = syn._getWithEntityBundle(entityBundle=bundle, entity=e)
    assert e.local_state()['zap'] == 'pow'
    assert e.synapseStore == False
    assert e.externalURL == url
def test_get_and_store():
    """Test synapse.get and synapse.store in Project, Folder and File"""
    ## create project
    project = Project(name=str(uuid.uuid4()), description='A bogus test project')
    project = syn.store(project)
    schedule_for_cleanup(project)

    ## create folder
    folder = Folder('Bad stuff', parent=project, description='The rejects from the other fauxldurr', pi=3)
    folder = syn.store(folder)

    ## get folder
    folder = syn.get(folder.id)
    assert folder.name == 'Bad stuff'
    assert folder.parentId == project.id
    assert folder.description == 'The rejects from the other fauxldurr'
    assert folder.pi[0] == 3

    ## update folder
    folder.pi = 3.14159265359
    folder.description = 'The rejects from the other folder'
    syn.store(folder)

    ## verify that the updates stuck
    folder = syn.get(folder)
    assert folder.name == 'Bad stuff'
    assert folder.parentId == project.id
    assert folder.description == 'The rejects from the other folder'
    assert folder.pi[0] == 3.14159265359

    ## upload a File
    path = utils.make_bogus_data_file()
    schedule_for_cleanup(path)
    random_data = File(path, parent=folder, description='Random data', foo=9844)
    random_data = syn.store(random_data)

    ## make sure file comes back intact
    random_data_2 = syn.downloadEntity(random_data)
    assert filecmp.cmp(path, random_data_2.path)
    assert random_data.foo[0] == 9844

    ## update with a new File
    new_file_path = utils.make_bogus_data_file()
    schedule_for_cleanup(new_file_path)
    random_data.path = new_file_path
    random_data.foo = 1266
    random_data = syn.store(random_data)

    ## should be version 2
    assert random_data.versionNumber == 2

    ## make sure the updates stuck
    random_data_2 = syn.get(random_data)
    assert random_data_2.path is not None
    assert filecmp.cmp(new_file_path, random_data_2.path)
    assert random_data_2.foo[0] == 1266
    assert random_data_2.versionNumber == 2

    ## make sure we can still get the older version of file
    old_random_data = syn.get(random_data.id, version=1)
    assert filecmp.cmp(old_random_data.path, path)