Exemple #1
0
def dontruntest_big_csvs(syn, project, schedule_for_cleanup):
    cols = [
        Column(name='name', columnType='STRING', maximumSize=1000),
        Column(name='foo',
               columnType='STRING',
               enumValues=['foo', 'bar', 'bat']),
        Column(name='x', columnType='DOUBLE'),
        Column(name='n', columnType='INTEGER'),
        Column(name='is_bogus', columnType='BOOLEAN')
    ]

    schema1 = syn.store(Schema(name='Big Table', columns=cols, parent=project))

    # write rows to CSV file
    with tempfile.NamedTemporaryFile(delete=False) as temp:
        schedule_for_cleanup(temp.name)
        filename = temp.name

    with io.open(filename, mode='w', encoding="utf-8", newline='') as temp:
        writer = csv.writer(temp,
                            quoting=csv.QUOTE_NONNUMERIC,
                            lineterminator=str(os.linesep))
        writer.writerow([col.name for col in cols])

        for i in range(10):
            for j in range(100):
                foo = cols[1].enumValues[random.randint(0, 2)]
                writer.writerow(
                    ('Robot ' + str(i * 100 + j), foo, random.random() * 200.0,
                     random.randint(0, 100), random.random() >= 0.5))
    # upload CSV
    syn._uploadCsv(filepath=temp.name, schema=schema1)

    from synapseclient.table import CsvFileTable
    CsvFileTable.from_table_query(syn, "select * from %s" % schema1.id)
def test_insert_dataframe_column_if_not_exist__nonexistent_column():
    df, column_name, data = _insert_dataframe_column_if_not_exist__setup()

    # method under test
    CsvFileTable._insert_dataframe_column_if_not_exist(df, 0, column_name, data)

    # make sure the data was inserted
    assert_equals(data, df[column_name].tolist())
def test_insert_dataframe_column_if_not_exist__nonexistent_column():
    df, column_name, data = _insert_dataframe_column_if_not_exist__setup()

    # method under test
    CsvFileTable._insert_dataframe_column_if_not_exist(df, 0, column_name, data)

    # make sure the data was inserted
    assert data == df[column_name].tolist()
 def test_iter_with_no_headers_in_csv(self):
     # csv file does not have headers
     string_io = StringIOContextManager("1,2,etag1,\"I like trains\"\n"
                                        "5,1,etag2,\"weeeeeeeeeeee\"\n")
     with patch.object(io, "open", return_value=string_io):
         table = CsvFileTable("syn123", "/fake/file/path", header=False)
         iter = table.__iter__()
         assert_raises(ValueError, next, iter)
Exemple #5
0
 def test_iter_with_no_headers_in_csv(self):
     # csv file does not have headers
     string_io = StringIOContextManager("1,2,etag1,\"I like trains\"\n"
                                        "5,1,etag2,\"weeeeeeeeeeee\"\n")
     with patch.object(io, "open", return_value=string_io):
         table = CsvFileTable("syn123", "/fake/file/path", header=False)
         iter = table.__iter__()
         assert_raises(ValueError, next, iter)
Exemple #6
0
 def test_iter_with_no_headers(self):
     # self.headers is None
     string_io = StringIOContextManager("ROW_ID,ROW_VERSION,ROW_ETAG,col\n"
                                        "1,2,etag1,\"I like trains\"\n"
                                        "5,1,etag2,\"weeeeeeeeeeee\"\n")
     with patch.object(io, "open", return_value=string_io):
         table = CsvFileTable("syn123", "/fake/file/path")
         iter = table.__iter__()
         assert_raises(ValueError, next, iter)
 def test_iter_with_no_headers(self):
     # self.headers is None
     string_io = StringIOContextManager("ROW_ID,ROW_VERSION,ROW_ETAG,col\n"
                                        "1,2,etag1,\"I like trains\"\n"
                                        "5,1,etag2,\"weeeeeeeeeeee\"\n")
     with patch.object(io, "open", return_value=string_io):
         table = CsvFileTable("syn123", "/fake/file/path")
         iter = table.__iter__()
         assert_raises(ValueError, next, iter)
 def test_iter_metadata__has_etag(self):
     string_io = StringIOContextManager("ROW_ID,ROW_VERSION,ROW_ETAG,asdf\n"
                                        "1,2,etag1,\"I like trains\"\n"
                                        "5,1,etag2,\"weeeeeeeeeeee\"\n")
     with patch.object(io, "open", return_value=string_io):
         csv_file_table = CsvFileTable("syn123", "/fake/file/path")
         metadata = [x for x in csv_file_table.iter_row_metadata()]
         assert 2 == len(metadata)
         assert (1, 2, "etag1") == metadata[0]
         assert (5, 1, "etag2") == metadata[1]
Exemple #9
0
 def test_iter_metadata__no_etag(self):
     string_io = StringIOContextManager("ROW_ID,ROW_VERSION,asdf\n"
                                        "1,2,\"I like trains\"\n"
                                        "5,1,\"weeeeeeeeeeee\"\n")
     with patch.object(io, "open", return_value=string_io):
         csv_file_table = CsvFileTable("syn123", "/fake/file/path")
         metadata = [x for x in csv_file_table.iter_row_metadata()]
         assert_equals(2, len(metadata))
         assert_equals((1, 2, None), metadata[0])
         assert_equals((5, 1, None), metadata[1])
 def test_iter_metadata__no_etag(self):
     string_io = StringIOContextManager("ROW_ID,ROW_VERSION,asdf\n"
                                        "1,2,\"I like trains\"\n"
                                        "5,1,\"weeeeeeeeeeee\"\n")
     with patch.object(io, "open", return_value=string_io):
         csv_file_table = CsvFileTable("syn123", "/fake/file/path")
         metadata = [x for x in csv_file_table.iter_row_metadata()]
         assert_equals(2, len(metadata))
         assert_equals((1, 2, None), metadata[0])
         assert_equals((5, 1, None), metadata[1])
def test_insert_dataframe_column_if_not_exist__existing_column_not_matching():
    df, column_name, data = _insert_dataframe_column_if_not_exist__setup()

    # add different data to the DataFrame prior to calling our method
    df.insert(0, column_name, ['mercy', 'main', 'btw'])

    # make sure the data is different
    assert_not_equals(data, df[column_name].tolist())

    # method under test should raise exception
    CsvFileTable._insert_dataframe_column_if_not_exist(df, 0, column_name, data)
def test_insert_dataframe_column_if_not_exist__existing_column_matching():

    df, column_name, data = _insert_dataframe_column_if_not_exist__setup()

    # add the same data to the DataFrame prior to calling our method
    df.insert(0, column_name, data)

    # method under test
    CsvFileTable._insert_dataframe_column_if_not_exist(df, 0, column_name, data)

    # make sure the data has not changed
    assert_equals(data, df[column_name].tolist())
def test_insert_dataframe_column_if_not_exist__existing_column_matching():

    df, column_name, data = _insert_dataframe_column_if_not_exist__setup()

    # add the same data to the DataFrame prior to calling our method
    df.insert(0, column_name, data)

    # method under test
    CsvFileTable._insert_dataframe_column_if_not_exist(df, 0, column_name, data)

    # make sure the data has not changed
    assert data == df[column_name].tolist()
Exemple #14
0
def test_insert_dataframe_column_if_not_exist__existing_column_not_matching():
    df, column_name, data = _insert_dataframe_column_if_not_exist__setup()

    # add different data to the DataFrame prior to calling our method
    df.insert(0, column_name, ['mercy', 'main', 'btw'])

    # make sure the data is different
    assert_not_equals(data, df[column_name].tolist())

    # method under test should raise exception
    CsvFileTable._insert_dataframe_column_if_not_exist(df, 0, column_name,
                                                       data)
 def test_iter_row_metadata_mismatch_in_headers(self):
     # csv file does not contain row metadata, self.headers does
     data = "col1,col2\n" \
            "1,2\n" \
            "2,1\n"
     cols = as_table_columns(StringIOContextManager(data))
     headers = [SelectColumn(name="ROW_ID", columnType="STRING"),
                SelectColumn(name="ROW_VERSION", columnType="STRING")] + \
               [SelectColumn.from_column(col) for col in cols]
     with patch.object(io, "open", return_value=StringIOContextManager(data)):
         table = CsvFileTable("syn123", "/fake/file/path", headers=headers)
         iter = table.__iter__()
         assert_raises(ValueError, next, iter)
 def test_iter_with_mismatch_row_metadata(self):
     # self.headers and csv file headers contains mismatch row metadata
     data = "ROW_ID,ROW_VERSION,ROW_ETAG,col\n" \
            "1,2,etag1,\"I like trains\"\n" \
             "5,1,etag2,\"weeeeeeeeeeee\"\n"
     cols = as_table_columns(StringIOContextManager(data))
     headers = [SelectColumn(name="ROW_ID", columnType="STRING"),
                SelectColumn(name="ROW_VERSION", columnType="STRING")] + \
               [SelectColumn.from_column(col) for col in cols]
     with patch.object(io, "open", return_value=StringIOContextManager(data)):
         table = CsvFileTable("syn123", "/fake/file/path", headers=headers)
         iter = table.__iter__()
         assert_raises(ValueError, next, iter)
Exemple #17
0
def test_insert_dataframe_column_if_not_exist__nonexistent_column():
    if pandas_found:
        raise SkipTest(
            "pandas could not be found. please let the pandas into your library."
        )

    df, column_name, data = _insert_dataframe_column_if_not_exist__setup()

    #method under test
    CsvFileTable._insert_dataframe_column_if_not_exist(df, 0, column_name,
                                                       data)

    #make sure the data was inserted
    assert_equals(data, df[column_name].tolist())
Exemple #18
0
 def test_iter_row_metadata_mismatch_in_headers(self):
     # csv file does not contain row metadata, self.headers does
     data = "col1,col2\n" \
            "1,2\n" \
            "2,1\n"
     cols = as_table_columns(StringIOContextManager(data))
     headers = [SelectColumn(name="ROW_ID", columnType="STRING"),
                SelectColumn(name="ROW_VERSION", columnType="STRING")] + \
               [SelectColumn.from_column(col) for col in cols]
     with patch.object(io,
                       "open",
                       return_value=StringIOContextManager(data)):
         table = CsvFileTable("syn123", "/fake/file/path", headers=headers)
         iter = table.__iter__()
         assert_raises(ValueError, next, iter)
Exemple #19
0
 def test_iter_with_mismatch_row_metadata(self):
     # self.headers and csv file headers contains mismatch row metadata
     data = "ROW_ID,ROW_VERSION,ROW_ETAG,col\n" \
            "1,2,etag1,\"I like trains\"\n" \
             "5,1,etag2,\"weeeeeeeeeeee\"\n"
     cols = as_table_columns(StringIOContextManager(data))
     headers = [SelectColumn(name="ROW_ID", columnType="STRING"),
                SelectColumn(name="ROW_VERSION", columnType="STRING")] + \
               [SelectColumn.from_column(col) for col in cols]
     with patch.object(io,
                       "open",
                       return_value=StringIOContextManager(data)):
         table = CsvFileTable("syn123", "/fake/file/path", headers=headers)
         iter = table.__iter__()
         assert_raises(ValueError, next, iter)
Exemple #20
0
def test_insert_dataframe_column_if_not_exist__existing_column_not_matching():
    if pandas_found:
        raise SkipTest(
            "pandas could not be found. please let the pandas into your library."
        )
    df, column_name, data = _insert_dataframe_column_if_not_exist__setup()

    #add different data to the DataFrame prior to calling our method
    df.insert(0, column_name, ['mercy', 'main', 'btw'])

    #make sure the data is different
    assert_not_equals(data, df[column_name].tolist())

    #method under test should raise exception
    CsvFileTable._insert_dataframe_column_if_not_exist(df, 0, column_name,
                                                       data)
def dontruntest_big_csvs():
    cols = []
    cols.append(Column(name='name', columnType='STRING', maximumSize=1000))
    cols.append(Column(name='foo', columnType='STRING', enumValues=['foo', 'bar', 'bat']))
    cols.append(Column(name='x', columnType='DOUBLE'))
    cols.append(Column(name='n', columnType='INTEGER'))
    cols.append(Column(name='is_bogus', columnType='BOOLEAN'))

    schema1 = syn.store(Schema(name='Big Table', columns=cols, parent=project))

    print "Created table:", schema1.id
    print "with columns:", schema1.columnIds

    ## write rows to CSV file
    with tempfile.NamedTemporaryFile(delete=False) as temp:
        schedule_for_cleanup(temp.name)
        writer = csv.writer(temp, quoting=csv.QUOTE_NONNUMERIC, lineterminator=os.linesep)
        writer.writerow([col.name for col in cols])

        for i in range(10):
            for j in range(100):
                foo = cols[1].enumValues[random.randint(0,2)]
                writer.writerow(('Robot ' + str(i*100 + j), foo, random.random()*200.0, random.randint(0,100), random.random()>=0.5))
            print "wrote 100 rows to disk"

    ## upload CSV
    UploadToTableResult = syn._uploadCsv(filepath=temp.name, schema=schema1)

    from synapseclient.table import CsvFileTable
    results = CsvFileTable.from_table_query(syn, "select * from %s" % schema1.id)
    print "etag:", results.etag
    print "tableId:", results.tableId

    for row in results:
        print row
Exemple #22
0
def test_insert_dataframe_column_if_not_exist__existing_column_matching():
    if pandas_found:
        raise SkipTest(
            "pandas could not be found. please let the pandas into your library."
        )

    df, column_name, data = _insert_dataframe_column_if_not_exist__setup()

    #add the same data to the DataFrame prior to calling our method
    df.insert(0, column_name, data)

    #method under test
    CsvFileTable._insert_dataframe_column_if_not_exist(df, 0, column_name,
                                                       data)

    #make sure the data has not changed
    assert_equals(data, df[column_name].tolist())
 def test_iter_with_table_row_metadata(self):
     # csv file has row metadata, self.headers does not
     data = "ROW_ID,ROW_VERSION,col\n" \
            "1,2,\"I like trains\"\n" \
            "5,1,\"weeeeeeeeeeee\"\n"
     cols = as_table_columns(StringIOContextManager(data))
     headers = [SelectColumn.from_column(col) for col in cols]
     with patch.object(io, "open", return_value=StringIOContextManager(data)):
         table = CsvFileTable("syn123", "/fake/file/path", headers=headers)
         expected_rows = [["I like trains"], ["weeeeeeeeeeee"]]
         for expected_row, table_row in zip(expected_rows, table):
             assert expected_row == table_row
 def test_iter_no_row_metadata(self):
     # both csv headers and self.headers do not contains row metadata
     data = "col1,col2\n" \
            "1,2\n" \
            "2,1\n"
     cols = as_table_columns(StringIOContextManager(data))
     headers = [SelectColumn.from_column(col) for col in cols]
     with patch.object(io, "open", return_value=StringIOContextManager(data)):
         table = CsvFileTable("syn123", "/fake/file/path", headers=headers)
         expected_rows = [[1, 2], [2, 1]]
         for expected_row, table_row in zip(expected_rows, table):
             assert expected_row == table_row
 def test_iter_with_file_view_row_metadata(self):
     # csv file and self.headers contain matching row metadata
     data = "ROW_ID,ROW_VERSION,ROW_ETAG,col\n" \
            "1,2,etag1,\"I like trains\"\n" \
            "5,1,etag2,\"weeeeeeeeeeee\"\n"
     cols = as_table_columns(StringIOContextManager(data))
     headers = [SelectColumn(name="ROW_ID", columnType="STRING"),
                SelectColumn(name="ROW_VERSION", columnType="STRING"),
                SelectColumn(name="ROW_ETAG", columnType="STRING")] + \
         [SelectColumn.from_column(col) for col in cols]
     with patch.object(io, "open", return_value=StringIOContextManager(data)):
         table = CsvFileTable("syn123", "/fake/file/path", headers=headers)
         expected_rows = [['1', '2', "etag1", "I like trains"],
                          ['5', '1', "etag2", "weeeeeeeeeeee"]]
         for expected_row, table_row in zip(expected_rows, table):
             assert expected_row == table_row
Exemple #26
0
def dontruntest_big_csvs():
    cols = []
    cols.append(Column(name='name', columnType='STRING', maximumSize=1000))
    cols.append(
        Column(name='foo',
               columnType='STRING',
               enumValues=['foo', 'bar', 'bat']))
    cols.append(Column(name='x', columnType='DOUBLE'))
    cols.append(Column(name='n', columnType='INTEGER'))
    cols.append(Column(name='is_bogus', columnType='BOOLEAN'))

    schema1 = syn.store(Schema(name='Big Table', columns=cols, parent=project))

    print("Created table:", schema1.id)
    print("with columns:", schema1.columnIds)

    ## write rows to CSV file
    with tempfile.NamedTemporaryFile(delete=False) as temp:
        schedule_for_cleanup(temp.name)
        filename = temp.name

    with io.open(filename, mode='w', encoding="utf-8", newline='') as temp:
        writer = csv.writer(temp,
                            quoting=csv.QUOTE_NONNUMERIC,
                            lineterminator=str(os.linesep))
        writer.writerow([col.name for col in cols])

        for i in range(10):
            for j in range(100):
                foo = cols[1].enumValues[random.randint(0, 2)]
                writer.writerow(
                    ('Robot ' + str(i * 100 + j), foo, random.random() * 200.0,
                     random.randint(0, 100), random.random() >= 0.5))
            print("wrote 100 rows to disk")

    ## upload CSV
    UploadToTableResult = syn._uploadCsv(filepath=temp.name, schema=schema1)

    from synapseclient.table import CsvFileTable
    results = CsvFileTable.from_table_query(syn,
                                            "select * from %s" % schema1.id)
    print("etag:", results.etag)
    print("tableId:", results.tableId)

    for row in results:
        print(row)