Example #1
0
def test_overflow_coercing():
    records = [{'_c0':'12345678901'}]
    desired_df = pd.DataFrame(records)
    desired_df['_c0'] = pd.to_numeric(desired_df['_c0'])
    df = pd.DataFrame(records)
    coerce_pandas_df_to_numeric_datetime(df)
    assert_frame_equal(desired_df, df)
Example #2
0
def test_df_dict_does_not_throw():
    json_str = """
[{
    "id": 580320,
    "name": "COUSIN'S GRILL",
    "results": "Fail",
    "violations": "37. TOILET area.",
    "words": ["37.",
    "toilet",
    "area."],
    "features": {
        "type": 0,
        "size": 262144,
        "indices": [0,
        45,
        97],
        "values": [7.0,
        5.0,
        1.0]
    },
    "rawPrediction": {
        "type": 1,
        "values": [3.640841752791392,
        -3.640841752791392]
    },
    "probability": {
        "type": 1,
        "values": [0.974440185187647,
        0.025559814812352966]
    },
    "prediction": 0.0
}]
"""
    df = pd.read_json(json_str)
    coerce_pandas_df_to_numeric_datetime(df)
def test_all_null_columns():
    records = [{'_c0':'12345', 'nulla': None}, {'_c0':'12345', 'nulla': None}]
    desired_df = pd.DataFrame(records)
    desired_df['_c0'] = pd.to_numeric(desired_df['_c0'])
    df = pd.DataFrame(records)
    coerce_pandas_df_to_numeric_datetime(df)
    assert_frame_equal(desired_df, df)
Example #4
0
def test_no_coercing():
    records = [{u'buildingID': 0, u'date': u'6/1/13', u'temp_diff': u'12'},
               {u'buildingID': 1, u'date': u'random', u'temp_diff': u'0adsf'}]
    desired_df = pd.DataFrame(records)

    df = pd.DataFrame(records)
    coerce_pandas_df_to_numeric_datetime(df)

    assert_frame_equal(desired_df, df)
Example #5
0
def test_numeric_coercing_none_values():
    records = [{u'buildingID': 0, u'date': u'6/1/13', u'temp_diff': u'12'},
               {u'buildingID': 1, u'date': u'asdf', u'temp_diff': None}]
    desired_df = pd.DataFrame(records)
    desired_df["temp_diff"] = pd.to_numeric(desired_df["temp_diff"])

    df = pd.DataFrame(records)
    coerce_pandas_df_to_numeric_datetime(df)

    assert_frame_equal(desired_df, df)
Example #6
0
 def _records_to_dataframe(records_text):
     if records_text == '':
         strings = []
     else:
         strings = records_text.split('\n')
     try:
         df = pd.DataFrame([json.loads(s) for s in strings])
         coerce_pandas_df_to_numeric_datetime(df)
         return df
     except ValueError:
         raise DataFrameParseException(
             u"Cannot parse object as JSON: '{}'".format(strings))
Example #7
0
def test_all_null_columns():
    records = [{
        '_c0': '12345',
        'nulla': None
    }, {
        '_c0': '12345',
        'nulla': None
    }]
    desired_df = pd.DataFrame(records)
    desired_df['_c0'] = pd.to_numeric(desired_df['_c0'])
    df = pd.DataFrame(records)
    coerce_pandas_df_to_numeric_datetime(df)
    assert_frame_equal(desired_df, df)
Example #8
0
 def _records_to_dataframe(records_text):
     if records_text == '':
         strings = []
     else:
         strings = records_text.split('\n')
     try:
         data_array = [json.JSONDecoder(object_pairs_hook=OrderedDict).decode(s) for s in strings]
         
         if len(data_array) > 0:
             df = pd.DataFrame(data_array, columns=data_array[0].keys())
         else:
             df = pd.DataFrame(data_array)
         
         coerce_pandas_df_to_numeric_datetime(df)
         return df
     except ValueError:
         raise DataFrameParseException(u"Cannot parse object as JSON: '{}'".format(strings))
Example #9
0
    def _records_to_dataframe(records_text, kind):
        if records_text == '':
            strings = []
        else:
            strings = records_text.split('\n')
        try:
            data_array = [json.JSONDecoder(object_pairs_hook=OrderedDict).decode(s) for s in strings]

            if kind == constants.SESSION_KIND_SPARKR and len(data_array) > 0:
                data_array = data_array[0]

            if len(data_array) > 0:
                df = pd.DataFrame(data_array, columns=data_array[0].keys())
            else:
                df = pd.DataFrame(data_array)
            
            coerce_pandas_df_to_numeric_datetime(df)
            return df
        except ValueError:
            raise DataFrameParseException(u"Cannot parse object as JSON: '{}'".format(strings))