Example #1
0
def test_tables_csv(syn, project):

    # Define schema
    cols = [
        Column(name='Name', columnType='STRING'),
        Column(name='Born', columnType='INTEGER'),
        Column(name='Hipness', columnType='DOUBLE'),
        Column(name='Living', columnType='BOOLEAN')
    ]

    schema = Schema(name='Jazz Guys', columns=cols, parent=project)

    data = [["John Coltrane", 1926, 8.65, False],
            ["Miles Davis", 1926, 9.87, False],
            ["Bill Evans", 1929, 7.65, False],
            ["Paul Chambers", 1935, 5.14, False],
            ["Jimmy Cobb", 1929, 5.78, True],
            ["Scott LaFaro", 1936, 4.21, False],
            ["Sonny Rollins", 1930, 8.99, True],
            ["Kenny Burrel", 1931, 4.37, True]]

    # the following creates a CSV file and uploads it to create a new table
    table = syn.store(Table(schema, data))

    # Query and download an identical CSV
    results = syn.tableQuery("select * from %s" % table.schema.id,
                             resultsAs="csv",
                             includeRowIdAndRowVersion=False)

    # Test that CSV file came back as expected
    for expected_row, row in zip(data, results):
        assert expected_row == row, "expected %s but got %s" % (expected_row,
                                                                row)
Example #2
0
def dontruntest_big_tables(syn, project):
    cols = [
        Column(name='name', columnType='STRING', maximumSize=1000),
        Column(name='foo',
               columnType='STRING',
               enumValues=['foo', 'bar', 'bat']),
        Column(name='x', columnType='DOUBLE'),
        Column(name='n', columnType='INTEGER'),
        Column(name='is_bogus', columnType='BOOLEAN')
    ]

    table1 = syn.store(Schema(name='Big Table', columns=cols, parent=project))

    rows_per_append = 10

    for i in range(1000):
        rows = []
        for j in range(rows_per_append):
            foo = cols[1].enumValues[random.randint(0, 2)]
            rows.append(
                Row(('Robot ' + str(i * rows_per_append + j), foo,
                     random.random() * 200.0, random.randint(0, 100),
                     random.random() >= 0.5)))
        syn.store(RowSet(columns=cols, schema=table1, rows=rows))

    syn.tableQuery("select * from %s" % table1.id)

    results = syn.tableQuery(
        "select n, COUNT(n), MIN(x), AVG(x), MAX(x), SUM(x) from %s group by n"
        % table1.id)
    results.asDataFrame()
Example #3
0
def dontruntest_big_csvs(syn, project, schedule_for_cleanup):
    cols = [
        Column(name='name', columnType='STRING', maximumSize=1000),
        Column(name='foo',
               columnType='STRING',
               enumValues=['foo', 'bar', 'bat']),
        Column(name='x', columnType='DOUBLE'),
        Column(name='n', columnType='INTEGER'),
        Column(name='is_bogus', columnType='BOOLEAN')
    ]

    schema1 = syn.store(Schema(name='Big Table', columns=cols, parent=project))

    # write rows to CSV file
    with tempfile.NamedTemporaryFile(delete=False) as temp:
        schedule_for_cleanup(temp.name)
        filename = temp.name

    with io.open(filename, mode='w', encoding="utf-8", newline='') as temp:
        writer = csv.writer(temp,
                            quoting=csv.QUOTE_NONNUMERIC,
                            lineterminator=str(os.linesep))
        writer.writerow([col.name for col in cols])

        for i in range(10):
            for j in range(100):
                foo = cols[1].enumValues[random.randint(0, 2)]
                writer.writerow(
                    ('Robot ' + str(i * 100 + j), foo, random.random() * 200.0,
                     random.randint(0, 100), random.random() >= 0.5))
    # upload CSV
    syn._uploadCsv(filepath=temp.name, schema=schema1)

    from synapseclient.table import CsvFileTable
    CsvFileTable.from_table_query(syn, "select * from %s" % schema1.id)
Example #4
0
def test_synapse_integer_columns_with_missing_values_from_dataframe(
        syn, project, schedule_for_cleanup):
    # SYNPY-267
    cols = [
        Column(name='x', columnType='STRING'),
        Column(name='y', columnType='INTEGER'),
        Column(name='z', columnType='DOUBLE')
    ]
    schema = syn.store(Schema(name='Big Table', columns=cols, parent=project))

    line_terminator = str(os.linesep)
    # write rows to CSV file
    with tempfile.NamedTemporaryFile(mode="w", suffix=".csv",
                                     delete=False) as temp:
        schedule_for_cleanup(temp.name)
        # 2nd row is missing a value in its integer column
        temp.write('x,y,z' + line_terminator + 'a,1,0.9' + line_terminator +
                   'b,,0.8' + line_terminator + 'c,3,0.7' + line_terminator)
        temp.flush()
        filename = temp.name

    # create a table from csv
    table = Table(schema, filename)
    df = table.asDataFrame()

    table_from_dataframe = Table(schema, df)
    assert table.filepath != table_from_dataframe.filepath
    df2 = table_from_dataframe.asDataFrame()
    assert_frame_equal(df, df2)
def test_createColumns(syn):
    columns_to_create = [Column(name="FirstTestColumn", columnType="INTEGER"), Column(name="SecondTestColumn",
                                                                                      columnType="DOUBLE")]
    created_columns = syn.createColumns(columns_to_create)
    assert len(columns_to_create) == len(created_columns)
    for col_to_create, created_col in zip(columns_to_create, created_columns):
        assert 'id' in created_col
        assert set(col_to_create.items()).issubset(set(created_col.items()))
def test_createColumns():
    columns_to_create = [
        Column(name="FirstTestColumn", columnType="INTEGER"),
        Column(name="SecondTestColumn", columnType="DOUBLE")
    ]
    created_columns = syn.createColumns(columns_to_create)
    assert_equals(len(columns_to_create), len(created_columns))
    for col_to_create, created_col in zip(columns_to_create, created_columns):
        assert_in('id', created_col)
        assert_dict_contains_subset(col_to_create, created_col)
def process_new_table(args, syn):
    """
    Function: process_new_table

    Purpose: Create an annotations table with the specified name under the
             specified Synapse parent ID using the specified JSON schema. This
             function is called when the "new_table" option is specified when
             the program is called.

    Arguments: JSON schema file reference
               Synapse parent ID
               Synapse table name
               A Synapse client object
    """

    # Define column names for the synapse table.
    dcc_column_names = [
        Column(name="key", columnType="STRING", maximumSize=100),
        Column(name="description", columnType="STRING", maximumSize=250),
        Column(name="columnType", columnType="STRING", maximumSize=50),
        Column(name="maximumSize", columnType="DOUBLE"),
        Column(name="value", columnType="STRING", maximumSize=250),
        Column(name="valueDescription", columnType="LARGETEXT"),
        Column(name="source", columnType="STRING", maximumSize=250),
        Column(name="module", columnType="STRING", maximumSize=100)
    ]

    syn_table_df = process_schema(args.json_schema_file)

    # Build and populate the Synapse table.
    table_schema = Schema(name=args.synapse_table_name,
                          columns=dcc_column_names,
                          parent=args.parent_synapse_id)
    dcc_table = syn.store(Table(table_schema, syn_table_df))
def to_column_objects(leaderboard_columns):
    """
    Turns a list of dictionaries of column configuration information defined
    in conf.leaderboard_columns) into a list of Column objects
    """
    column_keys = ['name', 'columnType', 'maximumSize', 'enumValues', 'defaultValue']
    return [Column(**{ key: col[key] for key in column_keys if key in col}) for col in leaderboard_columns]
def test_table_query(test_state):
    """Test command line ability to do table query."""

    cols = [
        Column(name='name', columnType='STRING', maximumSize=1000),
        Column(name='foo',
               columnType='STRING',
               enumValues=['foo', 'bar', 'bat']),
        Column(name='x', columnType='DOUBLE'),
        Column(name='age', columnType='INTEGER'),
        Column(name='cartoon', columnType='BOOLEAN')
    ]

    project_entity = test_state.project

    schema1 = test_state.syn.store(
        Schema(name=str(uuid.uuid4()), columns=cols, parent=project_entity))
    test_state.schedule_for_cleanup(schema1.id)

    data1 = [['Chris', 'bar', 11.23, 45, False],
             ['Jen', 'bat', 14.56, 40,
              False], ['Jane', 'bat', 17.89, 6, False],
             ['Henry', 'bar', 10.12, 1, False]]

    test_state.syn.store(RowSet(schema=schema1, rows=[Row(r) for r in data1]))

    # Test query
    output = run(test_state, 'synapse'
                 '--skip-checks', 'query', 'select * from %s' % schema1.id)

    output_rows = output.rstrip("\n").split("\n")

    # Check the length of the output
    assert len(output_rows) == 5, "got %s rows" % (len(output_rows), )

    # Check that headers are correct.
    # Should be column names in schema plus the ROW_ID and ROW_VERSION
    my_headers_set = output_rows[0].split("\t")
    expected_headers_set = ["ROW_ID", "ROW_VERSION"] + list(
        map(lambda x: x.name, cols))
    assert my_headers_set == expected_headers_set, "%r != %r" % (
        my_headers_set, expected_headers_set)
Example #10
0
def test_store_table_datetime(syn, project):
    current_datetime = datetime.fromtimestamp(round(time.time(), 3))
    schema = syn.store(
        Schema("testTable", [Column(name="testerino", columnType='DATE')],
               project))
    rowset = RowSet(rows=[Row([current_datetime])], schema=schema)
    syn.store(Table(schema, rowset))

    query_result = syn.tableQuery("select * from %s" % utils.id_of(schema),
                                  resultsAs="rowset")
    assert current_datetime == query_result.rowset['rows'][0]['values'][0]
Example #11
0
    def _ensure_syt_view(self):
        """
        Ensure the syt table/view exists for the project.
        """
        try:
            # This will fail if the schema doesn't exist. This is a synapseclient bug.
            self._syt_view = self._synapse_client.get(EntityViewSchema(
                name=self.SYT_VIEW_NAME, parent=self._project),
                                                      downloadFile=False)
        except:
            pass

        if self._syt_view == None:
            evs = EntityViewSchema(name=self.SYT_VIEW_NAME,
                                   parent=self._project,
                                   scopes=[self._project],
                                   properties={'viewTypeMask': 9})

            # Delete the 'type' property so we can set our own viewTypeMask to Files and Folders.
            evs.pop('type')

            # Since we removed 'type' we have to manually populate the base columns.
            evs.addColumn(Column(name='id', columnType='ENTITYID'))
            evs.addColumn(Column(name='parentId', columnType='ENTITYID'))
            evs.addColumn(Column(name='projectId', columnType='ENTITYID'))
            evs.addColumn(Column(name='type', columnType='STRING'))
            evs.addColumn(
                Column(name='name', columnType='STRING', maximumSize=256))

            evs.addColumn(
                Column(name=self.ANNO_CHECKED_OUT_BY_ID, columnType='STRING'))
            evs.addColumn(
                Column(name=self.ANNO_CHECKED_OUT_BY_NAME,
                       columnType='STRING'))
            evs.addColumn(
                Column(name=self.ANNO_CHECKED_OUT_DATE, columnType='DATE'))

            self._syt_view = self._synapse_client.store(evs)
Example #12
0
def test_rowset_tables(syn, project):
    cols = [
        Column(name='name', columnType='STRING', maximumSize=1000),
        Column(name='foo',
               columnType='STRING',
               enumValues=['foo', 'bar', 'bat']),
        Column(name='x', columnType='DOUBLE'),
        Column(name='age', columnType='INTEGER'),
        Column(name='cartoon', columnType='BOOLEAN'),
        Column(name='description', columnType='LARGETEXT')
    ]

    schema1 = syn.store(Schema(name='Foo Table', columns=cols, parent=project))

    data1 = [['Chris', 'bar', 11.23, 45, False, 'a'],
             ['Jen', 'bat', 14.56, 40, False, 'b'],
             ['Jane', 'bat', 17.89, 6, False, 'c' * 1002],
             ['Henry', 'bar', 10.12, 1, False, 'd']]
    row_reference_set1 = syn.store(
        RowSet(schema=schema1, rows=[Row(r) for r in data1]))
    assert len(row_reference_set1['rows']) == 4
Example #13
0
def update_global_scores_table(global_data):
    import challenge_config as config
    from synapseclient import Schema, Column, Table, Row, RowSet, as_table_columns
    # 'principalId', 'name', 'score_lb', 'score_mean', 'score_ub', 'rank'
    cols = [
        Column(name='UserID', columnType='STRING', maximumSize=100),
        Column(name='Name', columnType='STRING', maximumSize=100),
        Column(name='score_lb', columnType='DOUBLE'),
        Column(name='score_mean', columnType='DOUBLE'),
        Column(name='score_ub', columnType='DOUBLE'),
        Column(name='rank', columnType='DOUBLE'),
    ]
    schema = Schema(name='Global Scores',
                    columns=cols,
                    parent=config.CHALLENGE_SYN_ID)

    results = syn.tableQuery("select * from {}".format('syn7237020'))
    if len(results) > 0:
        a = syn.delete(results.asRowSet())
    table = syn.store(Table(schema, global_data))
    results = syn.tableQuery("select * from {}".format(table.tableId))
    for row in results:
        print row
    return
Example #14
0
def test_command_get_recursive_and_query(test_state):
    """Tests the 'synapse get -r' and 'synapse get -q' functions"""

    project_entity = test_state.project

    # Create Folders in Project
    folder_entity = test_state.syn.store(Folder(name=str(uuid.uuid4()),
                                                parent=project_entity))

    folder_entity2 = test_state.syn.store(Folder(name=str(uuid.uuid4()),
                                                 parent=folder_entity))

    # Create and upload two files in sub-Folder
    uploaded_paths = []
    file_entities = []

    for i in range(2):
        f = utils.make_bogus_data_file()
        uploaded_paths.append(f)
        test_state.schedule_for_cleanup(f)
        file_entity = File(f, parent=folder_entity2)
        file_entity = test_state.syn.store(file_entity)
        file_entities.append(file_entity)
        test_state.schedule_for_cleanup(f)

    # Add a file in the Folder as well
    f = utils.make_bogus_data_file()
    uploaded_paths.append(f)
    test_state.schedule_for_cleanup(f)
    file_entity = File(f, parent=folder_entity)
    file_entity = test_state.syn.store(file_entity)
    file_entities.append(file_entity)

    # get -r uses syncFromSynapse() which uses getChildren(), which is not immediately consistent,
    # but faster than chunked queries.
    time.sleep(2)
    # Test recursive get
    run(test_state,
        'synapse'
        '--skip-checks', 'get', '-r', folder_entity.id)
    # Verify that we downloaded files:
    new_paths = [os.path.join('.', folder_entity2.name, os.path.basename(f)) for f in uploaded_paths[:-1]]
    new_paths.append(os.path.join('.', os.path.basename(uploaded_paths[-1])))
    test_state.schedule_for_cleanup(folder_entity.name)
    for downloaded, uploaded in zip(new_paths, uploaded_paths):
        assert os.path.exists(downloaded)
        assert filecmp.cmp(downloaded, uploaded)
        test_state.schedule_for_cleanup(downloaded)

    # Test query get using a Table with an entity column
    # This should be replaced when Table File Views are implemented in the client
    cols = [Column(name='id', columnType='ENTITYID')]

    schema1 = test_state.syn.store(Schema(name='Foo Table', columns=cols, parent=project_entity))
    test_state.schedule_for_cleanup(schema1.id)

    data1 = [[x.id] for x in file_entities]

    test_state.syn.store(RowSet(schema=schema1, rows=[Row(r) for r in data1]))

    time.sleep(3)  # get -q are eventually consistent
    # Test Table/View query get
    run(test_state,
        'synapse'
        '--skip-checks', 'get', '-q',
        "select id from %s" % schema1.id)
    # Verify that we downloaded files:
    new_paths = [os.path.join('.', os.path.basename(f)) for f in uploaded_paths[:-1]]
    new_paths.append(os.path.join('.', os.path.basename(uploaded_paths[-1])))
    test_state.schedule_for_cleanup(folder_entity.name)
    for downloaded, uploaded in zip(new_paths, uploaded_paths):
        assert os.path.exists(downloaded)
        assert filecmp.cmp(downloaded, uploaded)
        test_state.schedule_for_cleanup(downloaded)

    test_state.schedule_for_cleanup(new_paths[0])
Example #15
0
def test_create_and_update_file_view(syn, project, schedule_for_cleanup):

    # Create a folder
    folder = Folder(str(uuid.uuid4()),
                    parent=project,
                    description='creating a file-view')
    folder = syn.store(folder)

    # Create dummy file with annotations in our folder
    path = utils.make_bogus_data_file()
    file_annotations = dict(fileFormat='jpg',
                            dataType='image',
                            artist='Banksy',
                            medium='print',
                            title='Girl With Ballon')
    schedule_for_cleanup(path)
    a_file = File(path, parent=folder, annotations=file_annotations)
    a_file = syn.store(a_file)
    schedule_for_cleanup(a_file)

    # Add new columns for the annotations on this file and get their IDs
    my_added_cols = [
        syn.store(Column(name=k, columnType="STRING"))
        for k in file_annotations.keys()
    ]
    my_added_cols_ids = [c['id'] for c in my_added_cols]
    view_default_ids = [
        c['id'] for c in syn._get_default_view_columns(
            "entityview", EntityViewType.FILE.value)
    ]
    col_ids = my_added_cols_ids + view_default_ids
    scopeIds = [folder['id'].lstrip('syn')]

    # Create an empty entity-view with defined scope as folder

    entity_view = EntityViewSchema(name=str(uuid.uuid4()),
                                   scopeIds=scopeIds,
                                   addDefaultViewColumns=True,
                                   addAnnotationColumns=False,
                                   type='file',
                                   columns=my_added_cols,
                                   parent=project)

    entity_view = syn.store(entity_view)
    schedule_for_cleanup(entity_view)

    assert set(scopeIds) == set(entity_view.scopeIds)
    assert set(col_ids) == set(entity_view.columnIds)
    assert EntityViewType.FILE.value == entity_view.viewTypeMask

    # get the current view-schema
    view = syn.tableQuery("select * from %s" % entity_view.id)
    schedule_for_cleanup(view.filepath)

    view_dict = list(
        csv.DictReader(io.open(view.filepath, encoding="utf-8", newline='')))

    # check that all of the annotations were retrieved from the view
    assert set(file_annotations.keys()).issubset(set(view_dict[0].keys()))

    updated_a_file = syn.get(a_file.id, downloadFile=False)

    # Check that the values are the same as what was set
    # Both in the view and on the entity itself
    for k, v in file_annotations.items():
        assert view_dict[0][k] == v
        assert updated_a_file.annotations[k][0] == v

    # Make a change to the view and store
    view_dict[0]['fileFormat'] = 'PNG'

    with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as temp:
        schedule_for_cleanup(temp.name)
        temp_filename = temp.name

    with io.open(temp_filename, mode='w', encoding="utf-8",
                 newline='') as temp_file:
        dw = csv.DictWriter(temp_file,
                            fieldnames=view_dict[0].keys(),
                            quoting=csv.QUOTE_NONNUMERIC,
                            lineterminator=str(os.linesep))
        dw.writeheader()
        dw.writerows(view_dict)
        temp_file.flush()
    syn.store(Table(entity_view.id, temp_filename))
    new_view_dict = list(
        csv.DictReader(io.open(temp_filename, encoding="utf-8", newline='')))
    assert new_view_dict[0]['fileFormat'] == 'PNG'

    # query for the change
    start_time = time.time()

    new_view_results = syn.tableQuery("select * from %s" % entity_view.id)
    schedule_for_cleanup(new_view_results.filepath)
    new_view_dict = list(
        csv.DictReader(
            io.open(new_view_results.filepath, encoding="utf-8", newline='')))
    # query until change is seen.
    while new_view_dict[0]['fileFormat'] != 'PNG':
        # check timeout
        assert time.time() - start_time < QUERY_TIMEOUT_SEC
        # query again
        new_view_results = syn.tableQuery("select * from %s" % entity_view.id)
        new_view_dict = list(
            csv.DictReader(
                io.open(new_view_results.filepath,
                        encoding="utf-8",
                        newline='')))
    # paranoid check
    assert new_view_dict[0]['fileFormat'] == 'PNG'
Example #16
0
def partial_rowset_test_state(syn, project):
    cols = [
        Column(name='foo', columnType='INTEGER'),
        Column(name='bar', columnType='INTEGER')
    ]
    table_schema = syn.store(
        Schema(name='PartialRowTest' + str(uuid.uuid4()),
               columns=cols,
               parent=project))
    data = [[1, None], [None, 2]]
    syn.store(RowSet(schema=table_schema, rows=[Row(r) for r in data]))

    # set up a file view
    folder = syn.store(
        Folder(name="PartialRowTestFolder" + str(uuid.uuid4()),
               parent=project))
    syn.store(
        File("~/path/doesnt/matter",
             name="f1",
             parent=folder,
             synapseStore=False))
    syn.store(
        File("~/path/doesnt/matter/again",
             name="f2",
             parent=folder,
             synapseStore=False))

    cols = [
        Column(name='foo', columnType='INTEGER'),
        Column(name='bar', columnType='INTEGER')
    ]
    view_schema = syn.store(
        EntityViewSchema(name='PartialRowTestViews' + str(uuid.uuid4()),
                         columns=cols,
                         addDefaultViewColumns=False,
                         parent=project,
                         scopes=[folder]))

    table_changes = [{'foo': 4}, {'bar': 5}]
    view_changes = [{'bar': 6}, {'foo': 7}]

    expected_table_cells = pd.DataFrame({
        'foo': [4.0, float('NaN')],
        'bar': [float('NaN'), 5.0]
    })
    expected_view_cells = pd.DataFrame({
        'foo': [float('NaN'), 7.0],
        'bar': [6.0, float('NaN')]
    })

    class TestState:
        def __init__(self):
            self.syn = syn
            self.project = project
            self.table_schema = table_schema
            self.view_schema = view_schema
            self.table_changes = table_changes
            self.view_changes = view_changes
            self.expected_table_cells = expected_table_cells
            self.expected_view_cells = expected_view_cells

    return TestState()
Example #17
0
def pubmed(args, syn):
    """
    Given a list of grant numbers pulled from a synapse table column, utilizes a pubmed API to generate a search query.
    This query is constructed by the union ('or' logic) of all the grant numbers, which would aid in pulling down a list
    of all PubMed publication id's associated with the grants. Then it will go through the PubMed id's and scrape the
    publication for basic informative information.

    :param args:
    :param syn:
    :return:
    """
    projectId = args.projectId
    project = syn.get(projectId)

    if args.grantviewId is not None:
        grantviewId = args.grantviewId
    else:
        grantviewId = "syn10142562"

    csbcGrants = csbcGrantList(syn, grantviewId)
    grantIds = getGrantQuery(csbcGrants)
    pubmedIds = getPubMedIds(grantIds)
    csbcView = getCenterIdsView(syn, grantviewId)

    # for utf encoding and debugging
    # finalTable.to_csv("csbc.csv", sep=',', index=False, encoding="utf-8")
    # finalTable = pandas.read_csv("csbc.csv", delimiter=',', encoding="utf-8")
    # os.remove("csbc.csv")

    if args.tableId:
        # update existing schema
        tableId = args.tableId
        schema = syn.get(tableId)

        publicationTable = syn.tableQuery("select * from %s" % tableId)
        currentTable = publicationTable.asDataFrame()

        new_pubmed_ids = list(
            set(pubmedIds) -
            set([i.split("=")[1] for i in list(currentTable.PubMed)]))
        finalTable = getPMIDDF(new_pubmed_ids, csbcGrants, csbcView)

        table = synapseclient.Table(schema, finalTable.values.tolist())
        table = syn.store(table)

    else:
        # create a new schema
        # cols = synapseclient.as_table_columns(finalTable)
        finalTable = getPMIDDF(pubmedIds, csbcGrants, csbcView)

        cols = [
            Column(name='CSBC PSON Center',
                   columnType='ENTITYID',
                   maximumSize=50),
            Column(name='Consortium', columnType='STRING', maximumSize=100),
            Column(name='PubMed', columnType='LINK', maximumSize=100),
            Column(name='Journal', columnType='STRING', maximumSize=100),
            Column(name='Publication Year', columnType='DATE'),
            Column(name='Title', columnType='STRING', maximumSize=500),
            Column(name='Authors', columnType='STRING', maximumSize=990),
            Column(name='Grant', columnType='STRING', maximumSize=50),
            Column(name='Data Location', columnType='LINK', maximumSize=1000),
            Column(name='Synapse Location',
                   columnType='ENTITYID',
                   maximumSize=50),
            Column(name='Keywords', columnType='STRING', maximumSize=250)
        ]

        schema = synapseclient.Schema(name=args.tableName,
                                      columns=cols,
                                      parent=project)
        table = synapseclient.Table(schema, finalTable)
        table = syn.store(table)
def test_copy():
    """Tests the copy function"""
    # Create a Project
    project_entity = syn.store(Project(name=str(uuid.uuid4())))
    schedule_for_cleanup(project_entity.id)
    acl = syn.setPermissions(
        project_entity,
        other_user['principalId'],
        accessType=['READ', 'CREATE', 'UPDATE', 'DOWNLOAD'])
    # Create two Folders in Project
    folder_entity = syn.store(
        Folder(name=str(uuid.uuid4()), parent=project_entity))
    second_folder = syn.store(
        Folder(name=str(uuid.uuid4()), parent=project_entity))
    third_folder = syn.store(
        Folder(name=str(uuid.uuid4()), parent=project_entity))
    schedule_for_cleanup(folder_entity.id)
    schedule_for_cleanup(second_folder.id)
    schedule_for_cleanup(third_folder.id)

    # Annotations and provenance
    repo_url = 'https://github.com/Sage-Bionetworks/synapsePythonClient'
    annots = {'test': ['hello_world']}
    prov = Activity(name="test", used=repo_url)
    # Create, upload, and set annotations/provenance on a file in Folder
    filename = utils.make_bogus_data_file()
    schedule_for_cleanup(filename)
    file_entity = syn.store(File(filename, parent=folder_entity))
    externalURL_entity = syn.store(
        File(repo_url, name='rand', parent=folder_entity, synapseStore=False))
    syn.setAnnotations(file_entity, annots)
    syn.setAnnotations(externalURL_entity, annots)
    syn.setProvenance(externalURL_entity.id, prov)
    schedule_for_cleanup(file_entity.id)
    schedule_for_cleanup(externalURL_entity.id)
    # ------------------------------------
    # TEST COPY FILE
    # ------------------------------------
    output = synapseutils.copy(syn,
                               file_entity.id,
                               destinationId=project_entity.id)
    output_URL = synapseutils.copy(syn,
                                   externalURL_entity.id,
                                   destinationId=project_entity.id,
                                   skipCopyAnnotations=True)

    #Verify that our copied files are identical
    copied_ent = syn.get(output[file_entity.id])
    copied_URL_ent = syn.get(output_URL[externalURL_entity.id],
                             downloadFile=False)

    copied_ent_annot = syn.getAnnotations(copied_ent)
    copied_url_annot = syn.getAnnotations(copied_URL_ent)
    copied_prov = syn.getProvenance(copied_ent)
    copied_url_prov = syn.getProvenance(copied_URL_ent)
    schedule_for_cleanup(copied_ent.id)
    schedule_for_cleanup(copied_URL_ent.id)

    # TEST: set_Provenance = Traceback
    print("Test: setProvenance = Traceback")
    assert copied_prov['used'][0]['reference']['targetId'] == file_entity.id
    assert copied_url_prov['used'][0]['reference'][
        'targetId'] == externalURL_entity.id

    # TEST: Make sure copied files are the same
    assert copied_ent_annot == annots
    assert copied_ent.dataFileHandleId == file_entity.dataFileHandleId

    # TEST: Make sure copied URLs are the same
    assert copied_url_annot == {}
    assert copied_URL_ent.externalURL == repo_url
    assert copied_URL_ent.name == 'rand'
    assert copied_URL_ent.dataFileHandleId == externalURL_entity.dataFileHandleId

    # TEST: Throw error if file is copied to a folder/project that has a file with the same filename
    assert_raises(ValueError,
                  synapseutils.copy,
                  syn,
                  project_entity.id,
                  destinationId=project_entity.id)
    assert_raises(ValueError,
                  synapseutils.copy,
                  syn,
                  file_entity.id,
                  destinationId=project_entity.id)
    assert_raises(ValueError,
                  synapseutils.copy,
                  syn,
                  file_entity.id,
                  destinationId=third_folder.id,
                  setProvenance="gib")
    assert_raises(ValueError,
                  synapseutils.copy,
                  syn,
                  file_entity.id,
                  destinationId=file_entity.id)

    print("Test: setProvenance = None")
    output = synapseutils.copy(syn,
                               file_entity.id,
                               destinationId=second_folder.id,
                               setProvenance=None)
    assert_raises(SynapseHTTPError, syn.getProvenance, output[file_entity.id])
    schedule_for_cleanup(output[file_entity.id])

    print("Test: setProvenance = Existing")
    output_URL = synapseutils.copy(syn,
                                   externalURL_entity.id,
                                   destinationId=second_folder.id,
                                   setProvenance="existing")
    output_prov = syn.getProvenance(output_URL[externalURL_entity.id])
    schedule_for_cleanup(output_URL[externalURL_entity.id])
    assert output_prov['name'] == prov['name']
    assert output_prov['used'] == prov['used']

    if 'username' not in other_user or 'password' not in other_user:
        sys.stderr.write(
            '\nWarning: no test-authentication configured. skipping testing copy function when trying to copy file made by another user.\n'
        )
        return

    try:
        print(
            "Test: Other user copy should result in different data file handle"
        )
        syn_other = synapseclient.Synapse(skip_checks=True)
        syn_other.login(other_user['username'], other_user['password'])

        output = synapseutils.copy(syn_other,
                                   file_entity.id,
                                   destinationId=third_folder.id)
        new_copied_ent = syn.get(output[file_entity.id])
        new_copied_ent_annot = syn.getAnnotations(new_copied_ent)
        schedule_for_cleanup(new_copied_ent.id)

        copied_URL_ent.externalURL = "https://www.google.com"
        copied_URL_ent = syn.store(copied_URL_ent)
        output = synapseutils.copy(syn_other,
                                   copied_URL_ent.id,
                                   destinationId=third_folder.id,
                                   version=1)
        new_copied_URL = syn.get(output[copied_URL_ent.id], downloadFile=False)
        schedule_for_cleanup(new_copied_URL.id)

        assert new_copied_ent_annot == annots
        assert new_copied_ent.dataFileHandleId != copied_ent.dataFileHandleId
        #Test if copying different versions gets you the correct file
        assert new_copied_URL.versionNumber == 1
        assert new_copied_URL.externalURL == repo_url
        assert new_copied_URL.dataFileHandleId != copied_URL_ent.dataFileHandleId
    finally:
        syn_other.logout()

    # ------------------------------------
    # TEST COPY LINKS
    # ------------------------------------
    print("Test: Copy Links")
    second_file = utils.make_bogus_data_file()
    #schedule_for_cleanup(filename)
    second_file_entity = syn.store(File(second_file, parent=project_entity))
    link_entity = Link(second_file_entity.id, parent=folder_entity.id)
    link_entity = syn.store(link_entity)

    #function under test uses queries which are eventually consistent but not immediately after creating the entities
    start_time = time.time()
    while syn.query("select id from entity where id=='%s'" %
                    link_entity.id).get('totalNumberOfResults') <= 0:
        assert_less(time.time() - start_time, QUERY_TIMEOUT_SEC)
        time.sleep(2)

    copied_link = synapseutils.copy(syn,
                                    link_entity.id,
                                    destinationId=second_folder.id)
    old = syn.get(link_entity.id, followLink=False)
    new = syn.get(copied_link[link_entity.id], followLink=False)
    assert old.linksTo['targetId'] == new.linksTo['targetId']
    assert old.linksTo['targetVersionNumber'] == new.linksTo[
        'targetVersionNumber']

    schedule_for_cleanup(second_file_entity.id)
    schedule_for_cleanup(link_entity.id)
    schedule_for_cleanup(copied_link[link_entity.id])

    time.sleep(3)

    assert_raises(ValueError,
                  synapseutils.copy,
                  syn,
                  link_entity.id,
                  destinationId=second_folder.id)

    # ------------------------------------
    # TEST COPY TABLE
    # ------------------------------------
    second_project = syn.store(Project(name=str(uuid.uuid4())))
    schedule_for_cleanup(second_project.id)
    print("Test: Copy Tables")
    cols = [
        Column(name='n', columnType='DOUBLE', maximumSize=50),
        Column(name='c', columnType='STRING', maximumSize=50),
        Column(name='i', columnType='INTEGER')
    ]
    data = [[2.1, 'foo', 10], [2.2, 'bar', 20], [2.3, 'baz', 30]]

    schema = syn.store(
        Schema(name='Testing', columns=cols, parent=project_entity.id))
    row_reference_set = syn.store(
        RowSet(columns=cols, schema=schema, rows=[Row(r) for r in data]))

    table_map = synapseutils.copy(syn,
                                  schema.id,
                                  destinationId=second_project.id)
    copied_table = syn.tableQuery('select * from %s' % table_map[schema.id])
    rows = copied_table.asRowSet()['rows']
    # TEST: Check if all values are the same
    for i, row in enumerate(rows):
        assert row['values'] == data[i]

    assert_raises(ValueError,
                  synapseutils.copy,
                  syn,
                  schema.id,
                  destinationId=second_project.id)

    schedule_for_cleanup(schema.id)
    schedule_for_cleanup(table_map[schema.id])

    # ------------------------------------
    # TEST COPY FOLDER
    # ------------------------------------
    print("Test: Copy Folder")
    mapping = synapseutils.copy(syn,
                                folder_entity.id,
                                destinationId=second_project.id)
    for i in mapping:
        old = syn.get(i, downloadFile=False)
        new = syn.get(mapping[i], downloadFile=False)
        assert old.name == new.name
        assert old.annotations == new.annotations
        assert old.concreteType == new.concreteType

    assert_raises(ValueError,
                  synapseutils.copy,
                  syn,
                  folder_entity.id,
                  destinationId=second_project.id)
    # TEST: Throw error if excludeTypes isn't in file, link and table or isn't a list
    assert_raises(ValueError,
                  synapseutils.copy,
                  syn,
                  second_folder.id,
                  destinationId=second_project.id,
                  excludeTypes=["foo"])
    assert_raises(ValueError,
                  synapseutils.copy,
                  syn,
                  second_folder.id,
                  destinationId=second_project.id,
                  excludeTypes="file")
    # TEST: excludeType = ["file"], only the folder is created
    second = synapseutils.copy(syn,
                               second_folder.id,
                               destinationId=second_project.id,
                               excludeTypes=["file", "table", "link"])

    copied_folder = syn.get(second[second_folder.id])
    assert copied_folder.name == second_folder.name
    assert len(second) == 1
    # TEST: Make sure error is thrown if foldername already exists
    start_time = time.time()
    while syn.query("select id from entity where id=='%s'" %
                    copied_folder.id).get('totalNumberOfResults') <= 0:
        assert_less(time.time() - start_time, QUERY_TIMEOUT_SEC)
        time.sleep(2)

    assert_raises(ValueError,
                  synapseutils.copy,
                  syn,
                  second_folder.id,
                  destinationId=second_project.id)

    # ------------------------------------
    # TEST COPY PROJECT
    # ------------------------------------
    print("Test: Copy Project")
    third_project = syn.store(Project(name=str(uuid.uuid4())))
    schedule_for_cleanup(third_project.id)

    mapping = synapseutils.copy(syn,
                                project_entity.id,
                                destinationId=third_project.id)
    for i in mapping:
        old = syn.get(i, downloadFile=False)
        new = syn.get(mapping[i], downloadFile=False)
        if not isinstance(old, Project):
            assert old.name == new.name
        assert old.annotations == new.annotations
        assert old.concreteType == new.concreteType

    # TEST: Can't copy project to a folder
    assert_raises(ValueError,
                  synapseutils.copy,
                  syn,
                  project_entity.id,
                  destinationId=second_folder.id)
Example #19
0
def test_copy():
    """Tests the copy function"""
    # Create a Project
    project_entity = syn.store(Project(name=str(uuid.uuid4())))
    schedule_for_cleanup(project_entity.id)
    # Create two Folders in Project
    folder_entity = syn.store(
        Folder(name=str(uuid.uuid4()), parent=project_entity))
    second_folder = syn.store(
        Folder(name=str(uuid.uuid4()), parent=project_entity))
    third_folder = syn.store(
        Folder(name=str(uuid.uuid4()), parent=project_entity))
    schedule_for_cleanup(folder_entity.id)
    schedule_for_cleanup(second_folder.id)
    schedule_for_cleanup(third_folder.id)

    # Annotations and provenance
    repo_url = 'https://github.com/Sage-Bionetworks/synapsePythonClient'
    annos = {'test': ['hello_world']}
    prov = Activity(name="test", used=repo_url)
    # Create, upload, and set annotations/provenance on a file in Folder
    filename = utils.make_bogus_data_file()
    schedule_for_cleanup(filename)
    file_entity = syn.store(File(filename, parent=folder_entity))
    externalURL_entity = syn.store(
        File(repo_url, name='rand', parent=folder_entity, synapseStore=False))
    syn.setAnnotations(file_entity, annos)
    syn.setAnnotations(externalURL_entity, annos)
    syn.setProvenance(externalURL_entity.id, prov)
    schedule_for_cleanup(file_entity.id)
    schedule_for_cleanup(externalURL_entity.id)
    # ------------------------------------
    # TEST COPY FILE
    # ------------------------------------
    output = synapseutils.copy(syn,
                               file_entity.id,
                               destinationId=project_entity.id)
    output_URL = synapseutils.copy(syn,
                                   externalURL_entity.id,
                                   destinationId=project_entity.id,
                                   skipCopyAnnotations=True)

    # Verify that our copied files are identical
    copied_ent = syn.get(output[file_entity.id])
    copied_URL_ent = syn.get(output_URL[externalURL_entity.id],
                             downloadFile=False)

    copied_ent_annot = syn.getAnnotations(copied_ent)
    copied_url_annot = syn.getAnnotations(copied_URL_ent)
    copied_prov = syn.getProvenance(copied_ent)
    copied_url_prov = syn.getProvenance(copied_URL_ent)
    schedule_for_cleanup(copied_ent.id)
    schedule_for_cleanup(copied_URL_ent.id)

    # TEST: set_Provenance = Traceback
    assert_equals(copied_prov['used'][0]['reference']['targetId'],
                  file_entity.id)
    assert_equals(copied_url_prov['used'][0]['reference']['targetId'],
                  externalURL_entity.id)

    # TEST: Make sure copied files are the same
    assert_equals(copied_ent_annot, annos)
    assert_equals(copied_ent.dataFileHandleId, file_entity.dataFileHandleId)

    # TEST: Make sure copied URLs are the same
    assert_equals(copied_url_annot, {})
    assert_equals(copied_URL_ent.externalURL, repo_url)
    assert_equals(copied_URL_ent.name, 'rand')
    assert_equals(copied_URL_ent.dataFileHandleId,
                  externalURL_entity.dataFileHandleId)

    # TEST: Throw error if file is copied to a folder/project that has a file with the same filename
    assert_raises(ValueError,
                  synapseutils.copy,
                  syn,
                  project_entity.id,
                  destinationId=project_entity.id)
    assert_raises(ValueError,
                  synapseutils.copy,
                  syn,
                  file_entity.id,
                  destinationId=project_entity.id)
    assert_raises(ValueError,
                  synapseutils.copy,
                  syn,
                  file_entity.id,
                  destinationId=third_folder.id,
                  setProvenance="gib")
    assert_raises(ValueError,
                  synapseutils.copy,
                  syn,
                  file_entity.id,
                  destinationId=file_entity.id)

    # Test: setProvenance = None
    output = synapseutils.copy(syn,
                               file_entity.id,
                               destinationId=second_folder.id,
                               setProvenance=None)
    assert_raises(SynapseHTTPError, syn.getProvenance, output[file_entity.id])
    schedule_for_cleanup(output[file_entity.id])

    # Test: setProvenance = Existing
    output_URL = synapseutils.copy(syn,
                                   externalURL_entity.id,
                                   destinationId=second_folder.id,
                                   setProvenance="existing")
    output_prov = syn.getProvenance(output_URL[externalURL_entity.id])
    schedule_for_cleanup(output_URL[externalURL_entity.id])
    assert_equals(output_prov['name'], prov['name'])
    assert_equals(output_prov['used'], prov['used'])

    # ------------------------------------
    # TEST COPY LINKS
    # ------------------------------------
    second_file = utils.make_bogus_data_file()
    # schedule_for_cleanup(filename)
    second_file_entity = syn.store(File(second_file, parent=project_entity))
    link_entity = Link(second_file_entity.id, parent=folder_entity.id)
    link_entity = syn.store(link_entity)

    copied_link = synapseutils.copy(syn,
                                    link_entity.id,
                                    destinationId=second_folder.id)
    old = syn.get(link_entity.id, followLink=False)
    new = syn.get(copied_link[link_entity.id], followLink=False)
    assert_equals(old.linksTo['targetId'], new.linksTo['targetId'])

    schedule_for_cleanup(second_file_entity.id)
    schedule_for_cleanup(link_entity.id)
    schedule_for_cleanup(copied_link[link_entity.id])

    time.sleep(3)

    assert_raises(ValueError,
                  synapseutils.copy,
                  syn,
                  link_entity.id,
                  destinationId=second_folder.id)

    # ------------------------------------
    # TEST COPY TABLE
    # ------------------------------------
    second_project = syn.store(Project(name=str(uuid.uuid4())))
    schedule_for_cleanup(second_project.id)
    cols = [
        Column(name='n', columnType='DOUBLE', maximumSize=50),
        Column(name='c', columnType='STRING', maximumSize=50),
        Column(name='i', columnType='INTEGER')
    ]
    data = [[2.1, 'foo', 10], [2.2, 'bar', 20], [2.3, 'baz', 30]]

    schema = syn.store(
        Schema(name='Testing', columns=cols, parent=project_entity.id))
    syn.store(RowSet(schema=schema, rows=[Row(r) for r in data]))

    table_map = synapseutils.copy(syn,
                                  schema.id,
                                  destinationId=second_project.id)
    copied_table = syn.tableQuery('select * from %s' % table_map[schema.id])
    rows = copied_table.asRowSet()['rows']
    # TEST: Check if all values are the same
    for i, row in enumerate(rows):
        assert_equals(row['values'], data[i])

    assert_raises(ValueError,
                  synapseutils.copy,
                  syn,
                  schema.id,
                  destinationId=second_project.id)

    schedule_for_cleanup(schema.id)
    schedule_for_cleanup(table_map[schema.id])

    # ------------------------------------
    # TEST COPY FOLDER
    # ------------------------------------
    mapping = synapseutils.copy(syn,
                                folder_entity.id,
                                destinationId=second_project.id)
    for i in mapping:
        old = syn.get(i, downloadFile=False)
        new = syn.get(mapping[i], downloadFile=False)
        assert_equals(old.name, new.name)
        assert_equals(old.annotations, new.annotations)
        assert_equals(old.concreteType, new.concreteType)

    assert_raises(ValueError,
                  synapseutils.copy,
                  syn,
                  folder_entity.id,
                  destinationId=second_project.id)
    # TEST: Throw error if excludeTypes isn't in file, link and table or isn't a list
    assert_raises(ValueError,
                  synapseutils.copy,
                  syn,
                  second_folder.id,
                  destinationId=second_project.id,
                  excludeTypes=["foo"])
    assert_raises(ValueError,
                  synapseutils.copy,
                  syn,
                  second_folder.id,
                  destinationId=second_project.id,
                  excludeTypes="file")
    # TEST: excludeType = ["file"], only the folder is created
    second = synapseutils.copy(syn,
                               second_folder.id,
                               destinationId=second_project.id,
                               excludeTypes=["file", "table", "link"])

    copied_folder = syn.get(second[second_folder.id])
    assert_equals(copied_folder.name, second_folder.name)
    assert_equals(len(second), 1)
    # TEST: Make sure error is thrown if foldername already exists

    assert_raises(ValueError,
                  synapseutils.copy,
                  syn,
                  second_folder.id,
                  destinationId=second_project.id)

    # ------------------------------------
    # TEST COPY PROJECT
    # ------------------------------------
    third_project = syn.store(Project(name=str(uuid.uuid4())))
    schedule_for_cleanup(third_project.id)

    mapping = synapseutils.copy(syn,
                                project_entity.id,
                                destinationId=third_project.id)
    for i in mapping:
        old = syn.get(i, downloadFile=False)
        new = syn.get(mapping[i], downloadFile=False)
        if not isinstance(old, Project):
            assert_equals(old.name, new.name)
        assert_equals(old.annotations, new.annotations)
        assert_equals(old.concreteType, new.concreteType)

    # TEST: Can't copy project to a folder
    assert_raises(ValueError,
                  synapseutils.copy,
                  syn,
                  project_entity.id,
                  destinationId=second_folder.id)