def test_ftp_download():
    """Test downloading an Entity that points to a file on an FTP server. """

    # Another test with an external reference. This is because we only need to test FTP download; not upload. Also so we don't have to maintain an FTP server just for this purpose.
    # Make an entity that points to an FTP server file.
    entity = File(parent=project['id'], name='1KB.zip')
    fileHandle = {}
    fileHandle['externalURL'] = 'ftp://speedtest.tele2.net/1KB.zip'
    fileHandle["fileName"] = entity.name
    fileHandle["contentType"] = "application/zip"
    fileHandle["contentMd5"] = '0f343b0931126a20f133d67c2b018a3b'
    fileHandle["contentSize"] = 1024
    fileHandle[
        "concreteType"] = "org.sagebionetworks.repo.model.file.ExternalFileHandle"
    fileHandle = syn.restPOST('/externalFileHandle', json.dumps(fileHandle),
                              syn.fileHandleEndpoint)
    entity.dataFileHandleId = fileHandle['id']
    entity = syn.store(entity)

    # Download the entity and check that MD5 matches expected
    FTPfile = syn.get(entity.id,
                      downloadLocation=os.getcwd(),
                      downloadFile=True)
    assert FTPfile.md5 == utils.md5_for_file(FTPfile.path).hexdigest()
    schedule_for_cleanup(entity)
    os.remove(FTPfile.path)
def test_resume_partial_download():
    original_file = utils.make_bogus_data_file(40000)
    original_md5 = utils.md5_for_file(original_file).hexdigest()

    entity = File(original_file, parent=project['id'])
    entity = syn.store(entity)

    ## stash the original file for comparison later
    shutil.move(original_file, original_file+'.original')
    original_file += '.original'
    schedule_for_cleanup(original_file)

    temp_dir = tempfile.gettempdir()

    url = '%s/entity/%s/file' % (syn.repoEndpoint, entity.id)
    path = syn._download(url, destination=temp_dir, file_handle_id=entity.dataFileHandleId, expected_md5=entity.md5)

    ## simulate an imcomplete download by putting the
    ## complete file back into its temporary location
    tmp_path = utils.temp_download_filename(temp_dir, entity.dataFileHandleId)
    shutil.move(path, tmp_path)

    ## ...and truncating it to some fraction of its original size
    with open(tmp_path, 'r+') as f:
        f.truncate(3*os.path.getsize(original_file)//7)

    ## this should complete the partial download
    path = syn._download(url, destination=temp_dir, file_handle_id=entity.dataFileHandleId, expected_md5=entity.md5)

    assert filecmp.cmp(original_file, path), "File comparison failed"
def test_pool_provider_is_used_in__multipart_upload():
    mocked_get_chunk_function = MagicMock(side_effect=[1, 2, 3, 4])
    file_size = 1 * MB
    filepath = make_bogus_binary_file(n=file_size)
    md5 = md5_for_file(filepath).hexdigest()
    status = {'partsState': {}, 'uploadId': {}, 'state': 'COMPLETED'}

    pool = MagicMock()
    with patch.object(syn, "restPOST", return_value=status),\
            patch.object(pool_provider, "get_pool", return_value=pool) as mock_provider:
        _multipart_upload(syn, filepath, "application/octet-stream",
                          mocked_get_chunk_function, md5, file_size)
        mock_provider.assert_called()
        pool.map.assert_called()
def test_pool_provider_is_used_in__multipart_upload():
    mocked_get_chunk_function = MagicMock(side_effect=[1, 2, 3, 4])
    file_size = 1*MB
    filepath = make_bogus_binary_file(n=file_size)
    md5 = md5_for_file(filepath).hexdigest()
    status = {'partsState': {},
              'uploadId': {},
              'state': 'COMPLETED'}

    pool = MagicMock()
    with patch.object(syn, "restPOST", return_value=status),\
            patch.object(pool_provider, "get_pool", return_value=pool) as mock_provider:
        _multipart_upload(syn, filepath, "application/octet-stream", mocked_get_chunk_function, md5, file_size)
        mock_provider.assert_called()
        pool.map.assert_called()
def test_md5_query():
    # Add the same Entity several times
    path = utils.make_bogus_data_file()
    schedule_for_cleanup(path)
    repeated = File(path, parent=project['id'], description='Same data over and over again')
    
    # Retrieve the data via MD5
    num = 5
    stored = []
    for i in range(num):
        repeated.name = 'Repeated data %d.dat' % i
        stored.append(syn.store(repeated).id)
    
    # Although we expect num results, it is possible for the MD5 to be non-unique
    results = syn.md5Query(utils.md5_for_file(path).hexdigest())
    assert str(sorted(stored)) == str(sorted([res['id'] for res in results]))
    assert len(results) == num    
def test_md5_query():
    # Create a project then add the same entity several times and retrieve them via MD5
    project = create_project()
    
    path = utils.make_bogus_data_file()
    schedule_for_cleanup(path)
    repeated = File(path, parent=project['id'], description='Same data over and over again')
    
    num = 5
    for i in range(num):
        try:
            repeated.name = 'Repeated data %d.dat' % i
            syn.store(repeated)
        except Exception as ex:
            print ex
            print ex.response.text
    results = syn.md5Query(utils.md5_for_file(path).hexdigest())
    print [res['id'] for res in results]
def test_md5_query():
    # Add the same Entity several times
    path = utils.make_bogus_data_file()
    schedule_for_cleanup(path)
    repeated = File(path,
                    parent=project['id'],
                    description='Same data over and over again')

    # Retrieve the data via MD5
    num = 5
    stored = []
    for i in range(num):
        repeated.name = 'Repeated data %d.dat' % i
        stored.append(syn.store(repeated).id)

    # Although we expect num results, it is possible for the MD5 to be non-unique
    results = syn.md5Query(utils.md5_for_file(path).hexdigest())
    assert str(sorted(stored)) == str(sorted([res['id'] for res in results]))
    assert len(results) == num
def test_md5_query():
    # Create a project then add the same entity several times and retrieve them via MD5
    project = create_project()

    path = utils.make_bogus_data_file()
    schedule_for_cleanup(path)
    repeated = File(path,
                    parent=project['id'],
                    description='Same data over and over again')

    num = 5
    for i in range(num):
        try:
            repeated.name = 'Repeated data %d.dat' % i
            syn.store(repeated)
        except Exception as ex:
            print ex
            print ex.response.text
    results = syn.md5Query(utils.md5_for_file(path).hexdigest())
    print[res['id'] for res in results]

    ## Not sure how to make this assertion more accurate
    ## Although we expect num results, it is possible for the MD5 to be non-unique
    assert len(results) == num
def test_resume_partial_download():
    original_file = utils.make_bogus_data_file(40000)
    original_md5 = utils.md5_for_file(original_file).hexdigest()

    entity = File(original_file, parent=project['id'])
    entity = syn.store(entity)

    ## stash the original file for comparison later
    shutil.move(original_file, original_file + '.original')
    original_file += '.original'
    schedule_for_cleanup(original_file)

    temp_dir = tempfile.gettempdir()

    url = '%s/entity/%s/file' % (syn.repoEndpoint, entity.id)
    path = syn._download_from_URL(url,
                                  destination=temp_dir,
                                  fileHandleId=entity.dataFileHandleId,
                                  expected_md5=entity.md5)

    ## simulate an imcomplete download by putting the
    ## complete file back into its temporary location
    tmp_path = utils.temp_download_filename(temp_dir, entity.dataFileHandleId)
    shutil.move(path, tmp_path)

    ## ...and truncating it to some fraction of its original size
    with open(tmp_path, 'r+') as f:
        f.truncate(3 * os.path.getsize(original_file) // 7)

    ## this should complete the partial download
    path = syn._download_from_URL(url,
                                  destination=temp_dir,
                                  fileHandleId=entity.dataFileHandleId,
                                  expected_md5=entity.md5)

    assert filecmp.cmp(original_file, path), "File comparison failed"