Example #1
0
def test_read_visibility():
    # A fake visibility is first created and written to file
    # Then the file is read and the two visibilities (fake and reread) are compared. 
    from pandas  import DataFrame
    from numpy import sqrt
    from numpy.random import random
    from numpy.testing import assert_allclose, assert_equal
    from tempfile import NamedTemporaryFile
    from purify import read_visibility

    N = 10
    noise = random(N)
    expected = DataFrame({
        'u': random(N), 'v': random(N), 'w': [0] * N,
        'noise': (1+1j) / sqrt(2) * noise, 'y': random(N) + 1j * random(N)
    })

    csv = DataFrame({
        'u': expected['u'], 'v': expected['v'],
        'yreal': expected['y'].real, 'yimag': expected['y'].imag,
        'noise': noise
    })

    with NamedTemporaryFile(delete=True) as file:
        file.close()
        csv.to_csv(file.name, header=False, cols=['u', 'v', 'yreal', 'yimag', 'noise'])
        actual = read_visibility(file.name)

        assert_equal(set(expected.keys()), set(expected.keys()))
        for name in expected.keys(): 
            assert_allclose( actual[name], expected[name], 
                             err_msg = "Columns %s did not compare" % name )
Example #2
0
def plot_phonemes(path):
    phoneme_embeddings = dict()
    for line in codecs.open(path,"r"):
        line = line.split(",")
        key= line[0][1:-1]
        emb = line[1:]
        emb[-1] = emb[-1][:-1]
        emb = np.array([float(e) for e in emb])
        phoneme_embeddings[key] = emb
    
    phoneme_embeddings = DataFrame(phoneme_embeddings,columns=phoneme_embeddings.keys())
    print(phoneme_embeddings.columns)
    
    m = TSNE()
    phoneme_embeddings_tsne = m.fit_transform(phoneme_embeddings.transpose())
    print(len(phoneme_embeddings_tsne))
    for p,emb in zip(phoneme_embeddings.columns, phoneme_embeddings_tsne):
        c = "black"
        if regex.search("^[aeiou3E][*]?$", p):
            c = "red"
            plt.annotate(p,(emb[0],emb[1]),color=c)
        if regex.search("^.*w~$", p):
            c = "blue"
            plt.annotate(p,(emb[0],emb[1]),color=c)
        if regex.search("^.*y~$", p):
            c = "yellow"
            plt.annotate(p,(emb[0],emb[1]),color=c)
        if regex.search("^.*h~$", p):
            c = "brown"
            plt.annotate(p,(emb[0],emb[1]),color=c)
        if regex.search("^.*\"$", p):
            c = "green"
            plt.annotate(p,(emb[0],emb[1]),color=c)
Example #3
0
def dist2D(dist: pd.DataFrame,
           ranges: pd.DataFrame,
           nlevels: int=16,
           nx: int=2,
           size: int=6,
           colorbar: bool=True,
           name: str='dist') -> plt.Figure:
    """
    Plot 2D probability distributions.

    Parameters
    ----------
    dist : Multiindexed dataframe with force field as primary
        index and distributions as created by dist2D().
    ranges : Multiindexed dataframe with force field as primary
        index and edges as created by dist1D().
    nlevels : Number of contour levels to use.
    nx : Number of plots per row.
    size : Relative size of each plot.
    colorbar : If true, will plot a colorbar.
    name : Name of the distribution.

    Returns
    -------
    fig : matplotlib figure.

    """

    # Setup plotting parameters
    nplots = dist.shape[1]
    xsize, ysize = nx, (nplots // nx) + 1
    cmap = plt.get_cmap('viridis')
    fig = plt.figure(figsize=(xsize * size, ysize * size))

    for i, k in enumerate(dist.keys()):

        # Get keys for both CVs
        kx, ky = k.split('.')

        # Prepare plotting grid (np.meshgrid doesn't work)
        X = np.broadcast_to(ranges[kx], dist[k].unstack().shape)
        Y = np.broadcast_to(ranges[ky], dist[k].unstack().shape).T
        Z = dist[k].unstack().values.T

        # Contour levels taking inf into account
        levels = np.linspace(np.amin(Z[~np.isinf(Z)]),
                             np.amax(Z[~np.isinf(Z)]), nlevels)
        ax = fig.add_subplot(ysize, xsize, i + 1)
        cm = ax.contourf(X, Y, Z, cmap=cmap, levels=levels)
        ax.set_xlabel(kx)
        ax.set_ylabel(ky)
        ax.set_title(name)

    if colorbar:
        fig.colorbar(cm)

    return fig
def df2boxplots(sc_df: pd.DataFrame) -> None:
    rows = 5
    cols = (len(sc_df.keys()) / 5) + 1
    for i, flt in enumerate(sc_df):
        if flt in ['description', 'SCORE:']:
            continue
        ax = plt.subplot(rows, cols, i+1)
        plt.boxplot(sc_df[flt].tolist())
        plt.title(flt)
    plt.show()
Example #5
0
def plot_languages(path):
    phoneme_embeddings = dict()
    for line in codecs.open(path,"r"):
        line = line.split(",")
        key= line[0][1:-1]
        emb = line[1:]
        emb[-1] = emb[-1][:-1]
        emb = np.array([float(e) for e in emb])
        phoneme_embeddings[key] = emb
    
    phoneme_embeddings = DataFrame(phoneme_embeddings,columns=phoneme_embeddings.keys())
    print(phoneme_embeddings.columns)
    
    m = TSNE()
    phoneme_embeddings_tsne = m.fit_transform(phoneme_embeddings[["STANDARD_GERMAN","BERNESE_GERMAN","EASTERN_FRISIAN","NORTH_FRISIAN_AMRUM",                                                           
                 "ENGLISH","DUTCH","YIDDISH_EASTERN","YIDDISH_WESTERN","DANISH","SWEDISH","FAROESE","NORWEGIAN_RIKSMAL",
                 "GJESTAL_NORWEGIAN","NORWEGIAN_BOKMAAL","NORWEGIAN_NYNORSK_TOTEN","SANDNES_NORWEGIAN",
                 "ICELANDIC","POLISH","KASHUBIAN","CZECH",
                 "LOWER_SORBIAN","UPPER_SORBIAN","SLOVAK","SLOVENIAN","MACEDONIAN","BULGARIAN","UKRAINIAN",
                 "BELARUSIAN","RUSSIAN","ARABIC_CYPRIOT_SPOKEN","ARABIC_GULF_SPOKEN","ARABIC_LIBYAN_SPOKEN","ARABIC_NORTH_LEVANTINE_SPOKEN",
                 "ARABIC_SUDANESE_SPOKEN","CAIRO_ARABIC","DAMASCUS_ARABIC"]].transpose())
    print(len(phoneme_embeddings_tsne))
    for p,emb in zip(["STANDARD_GERMAN","BERNESE_GERMAN","EASTERN_FRISIAN","NORTH_FRISIAN_AMRUM",                                                         
                 "ENGLISH","DUTCH","YIDDISH_EASTERN","YIDDISH_WESTERN","DANISH","SWEDISH","FAROESE","NORWEGIAN_RIKSMAL",
                 "GJESTAL_NORWEGIAN","NORWEGIAN_BOKMAAL","NORWEGIAN_NYNORSK_TOTEN","SANDNES_NORWEGIAN",
                 "ICELANDIC","POLISH","KASHUBIAN","CZECH",
                 "LOWER_SORBIAN","UPPER_SORBIAN","SLOVAK","SLOVENIAN","MACEDONIAN","BULGARIAN","UKRAINIAN",
                 "BELARUSIAN","RUSSIAN","ARABIC_CYPRIOT_SPOKEN","ARABIC_GULF_SPOKEN","ARABIC_LIBYAN_SPOKEN","ARABIC_NORTH_LEVANTINE_SPOKEN",
                 "ARABIC_SUDANESE_SPOKEN","CAIRO_ARABIC","DAMASCUS_ARABIC"], phoneme_embeddings_tsne):
        c = "black"
    #     if regex.search("[aeiou3E]\\*?", p):
    #         c = "red"
    #     if regex.search(".*w~", p):
    #         c = "blue"
    #     if regex.search(".*y~", p):
    #         c = "yellow"
    #     if regex.search(".*h~", p):
    #         c = "brown"
    #     if regex.search(".*\"", p):
    #         c = "green"
        if p in ["STANDARD_GERMAN","BERNESE_GERMAN","EASTERN_FRISIAN","FRISIAN_WESTERN,","NORTH_FRISIAN_AMRUM"                                                            
                 "ENGLISH","DUTCH","YIDDISH_EASTERN","YIDDISH_WESTERN","DANISH","SWEDISH","FAROESE","NORWEGIAN_RIKSMAL",
                 "GJESTAL_NORWEGIAN","NORWEGIAN_BOKMAAL","NORWEGIAN_NYNORSK_TOTEN","SANDNES_NORWEGIAN","ICELANDIC"]:
            c = "red"
            plt.annotate(p,(emb[0],emb[1]),color=c)
    
        if p in ["POLISH","KASHUBIAN","CZECH",
                 "LOWER_SORBIAN","UPPER_SORBIAN","SLOVAK","SLOVENIAN","MACEDONIAN","BULGARIAN","UKRAINIAN",
                 "BELARUSIAN","RUSSIAN"]:
            c = "blue"
            plt.annotate(p,(emb[0],emb[1]),color=c)
        if p in ["ARABIC_CYPRIOT_SPOKEN","ARABIC_GULF_SPOKEN","ARABIC_LIBYAN_SPOKEN","ARABIC_NORTH_LEVANTINE_SPOKEN",
                 "ARABIC_SUDANESE_SPOKEN","CAIRO_ARABIC","DAMASCUS_ARABIC"]:
            c = "green"
            plt.annotate(p,(emb[0],emb[1]),color=c)
Example #6
0
def test_python_to_c_to_python():
    """" Cycle visibility bindings from python to C to python. """
    from pandas  import DataFrame
    from numpy import sqrt
    from numpy.random import random
    from numpy.testing import assert_allclose, assert_equal
    from purify.tests.visibility_testing import _bindings_cycle

    N = 10
    noise = random(N)
    expected = DataFrame({
        'u': random(N), 'v': random(N), 'w': random(N),
        'noise': (1+1j) / sqrt(2) * noise, 'y': random(N) + 1j * random(N)
    })

    actual = _bindings_cycle(expected)

    assert_equal(set(expected.keys()), set(expected.keys()))
    for name in expected.keys(): 
        assert_allclose( actual[name], expected[name], 
                         err_msg = "Columns %s did not compare" % name )
Example #7
0
    def dump(self):
        chip_id_full = '999:chip_id_full'
        data = DataFrame(self.PFTR_nd).transpose()
        keys = data.keys()
        key_hard_bin = ''.join(filter(lambda s: 'HARD_BIN' in s, keys))
        key_site_num = ''.join(filter(lambda s: 'SITE_NUM' in s, keys))
        key_soft_bin = ''.join(filter(lambda s: 'SOFT_BIN' in s, keys))
        key_efuse_burned = ''.join(filter(lambda s: 'efuse_burned' in s, keys))
        key_chip_id_part0 = ''.join(filter(lambda s: 'chip_id_part0' in s, keys))
        key_chip_id_part1 = ''.join(filter(lambda s: 'chip_id_part1' in s, keys))

        for i in data[key_soft_bin].unique():
            self.test_yield[i] = {}
            # basename = os.path.basename(self.Path_name)
            # name_front = basename.find('_') + 1
            # name_end = basename.find('---') - 12
            # name = basename[name_front:name_end]
            # self.test_yield[i][name] = {}
            # for j in data[key_site_num].unique():
            #     self.test_yield[i][name][j] = data[(data[key_site_num] == j) & (data[key_soft_bin] == i)][
            #         key_hard_bin].count()
            #     self.test_yield[i][name]['total'] = data[(data[key_soft_bin] == i)][key_hard_bin].count()
            #     self.test_yield[i][name]['yield'] = '{0:.2%}'.format(
            #         self.test_yield[i][name]['total'] / float(data[key_hard_bin].count()))
            # df_1 = DataFrame(self.test_yield).transpose()
            # dfs = [pd.DataFrame([x for x in df_1[col]], index=df_1.index) for col in df_1.columns]
            # df2 = pd.concat(dfs, axis=1, keys=df_1.columns)
            # df2.columns.names = ['test', 'info']
            # df2.index.names = ['soft_bin']

            for j in data[key_site_num].unique():
                self.test_yield[i][j] = data[(data[key_site_num] == j) & (data[key_soft_bin] == i)][
                    key_hard_bin].count()
                self.test_yield[i]['total'] = data[(data[key_soft_bin] == i)][key_hard_bin].count()
                self.test_yield[i]['yield'] = '{0:.2%}'.format(
                    self.test_yield[i]['total'] / float(data[key_hard_bin].count()))
        df_1 = DataFrame(self.test_yield).transpose()

        data[chip_id_full] = data[key_chip_id_part0] + data[key_chip_id_part1] * 10000000
        data_id = data[(data[key_hard_bin] == 1) & (data[key_efuse_burned] == 0)][chip_id_full]
        if data_id[data_id.duplicated() == True].count() > 0:
            raise (self.Path_name + 'is with duplicated chip id')
            with open(self.Path_name + 'duplicated.txt', w) as duplicated_txt:
                duplicated_txt.write(self.Path_name + 'is with duplicated chip id')
        with ExcelWriter(self.Path_name) as writer:
            DataFrame(self.test_info).to_excel(writer, sheet_name='Related')
            DataFrame(self.PMR_nd).transpose().to_excel(writer, sheet_name='PMR')
            DataFrame(self.PFTR_nd).transpose().to_excel(writer, sheet_name='PTR_FTR')
            DataFrame(
                data[(data[key_hard_bin] == 1) & (data[key_efuse_burned] == 0)]).describe().transpose().combine_first(
                DataFrame(self.spec_summ).transpose()).to_excel(writer, sheet_name='summary_spec')
            df_1.to_excel(writer, sheet_name='yield')
Example #8
0
def resample_df(original_df, rs_interval='60Min', rs_how='last',
                window_size=4):
    # resample
    df = original_df.copy()
    rs = original_df.resample(rs_interval, how=rs_how)
    df = DataFrame(rs)
    df = df[pd.notnull(df).any(axis=1)]  # remove pull NaN rows

    # add windows
    for k in df.keys():
        for ind in range(1, window_size):
            vn = unicode(k) + u'-' + unicode(ind)
            df[vn] = np.hstack((np.array([np.NaN] * ind),
                                df[k].values))[:-ind]

    # destroy first lines
    df = df[window_size - 1:]  # this -1 is destroyed later

    return df
Example #9
0
def sensors_estimation(
    signal_data: pd.DataFrame, sensors_delta_distance: list
) -> [np.array]:
    """

    :param signal_data:
    :param sensors_delta_distance:
    :return:
    """
    # x axis: time
    x = signal_data.index.values

    sensors_peak_time = []
    sensors_delta_time = [None]

    for k in signal_data.keys():
        # y axis: volts
        y = signal_data[k].values

        indexes = peakutils.indexes(y, thres=0.5, min_dist=30)

        sensors_peak_time.append(x[indexes])

    for i in range(1, len(sensors_peak_time)):
        sensors_delta_time.append(
            sensors_peak_time[i] - sensors_peak_time[i - 1]
        )

    # the information about first sensor should be equal to the second sensor
    sensors_delta_time[0] = sensors_delta_time[1]

    sensors_delta_speed = []

    for i in range(len(sensors_delta_distance)):
        sensors_delta_speed.append(
            sensors_delta_distance[i] / sensors_delta_time[i]
        )

    # the information about first sensor should be equal to the second sensor
    sensors_delta_speed[0] = sensors_delta_speed[1]

    return sensors_delta_speed
Example #10
0
def agg_by_state(df):
    
    '''Aggregate data by US state, summing all relevant metrics'''
    
    # Define lambda functions for aggregation
    count_user = lambda x: sum(x == 'user')
    count_hash = lambda x: sum(x == 'hash')
    count_none = lambda x: sum(x == 'none')
    count_user_hash = lambda x: (count_user(x) / count_hash(x)) \
                                if count_hash(x) > 0 else 0
    
    # Create an aggregation dictionary
    agg_dict = {'count': len, 'n_user': count_user, 'n_hash': count_hash,
                'n_none': count_none, 'user_hash': count_user_hash}

    # Perform aggregation by state
    grouped = df.groupby(by='state', as_index=False)
    df = grouped['u_o_h'].agg(agg_dict)
    
    # Load state data
    with open('J:\WDPRO\BPM\us_states.csv', 'r') as f:
        states = {}
        for abbrev, name in reader(f):
           states[abbrev] = name
    states = DataFrame(data=states.values(), index=states.keys())
    
    # Restrict results to US states
    df = df[df.state.isin(states.index)]
    
    # Join the full state name
    df = df.join(states, on='state')
    df.rename(columns={0: 'state_name'}, inplace=True)
    df['state_name'] = [i.lower() for i in df['state_name']]
    
    # Rank the states
    df['count_rank'] = df['count'].rank(ascending=False)
    
    # Return DataFrame
    return df
Example #11
0
def _get_cols_with_nans(in_data: DataFrame):
    for col_name in in_data.keys():
        if in_data[col_name].hasnans:
            yield col_name
table=xls_file.parse('Sheet1')

#******************************************
#分析 HTML JSON格式 数据  用其提供的API 接口 P181

import requests
url='http://live.qq.com/json/movie/all/hot2/list_7.json'

resp=requests.get(url)

resp

import json
data=json.loads(resp.text)

data.keys()

#************************************************
#与数据库的 交互
#yong pandas 提供 的 嵌入式 SQLite 数据库

import  sqlite3
query="""
CREATE TABLE test
(a VARCHAR(20),b VARCHAR (20),
c REAL  ,d INTEGER );"""
con=sqlite3.connect(':memory:')
con.execute(query)
con.commit()

Example #13
0
def create_data_set(
    data_file: h5py.File,
    data: pd.DataFrame,
    sample_rate: int=None,
    date_time: datetime=datetime.now(),
    site_id: str='000',
    lane_id: str='00',
    temperature: float=None,
    license_plate: str=None,
    sensor_calibration_factory: list=None,
    distance_between_sensors: list=None,
    sensor_type: str=None,
    sensors_layout: str=None,
    channel_configuration: str=None,
    **kwargs
) -> h5py.Dataset:
    """

    :param data_file:
    :param data:
    :param sample_rate: (e.g. 2000)
    :param date_time: (e.g. 2017-49-04 00:49:36)
    :param site_id: (e.g. 001)
    :param lane_id: (e.g. 01)
    :param temperature: (e.g. 28.5)
    :param license_plate: (e.g. AAA9999)
    :param sensor_calibration_factory: (e.g. [0.98, 0.99, 0.75])
    :param distance_between_sensors: (e.g. [1.0, 1.5, 2.0])
    :param sensor_type: (e.g. quartz, polymer, ceramic, mixed)
    :param sensors_layout: (e.g. |/|\|<|>|=|)
    :param channel_configuration: (this is a, optional attribute, it is
        required just when sensor type is mixed,
        e.g. "{'a0': 'polymer', 'a1': 'ceramic'})"
    :param kwargs:
    :return:
    """

    dset_id = 'run_{}_{}_{}'.format(
        site_id, lane_id, date_time.strftime('%Y%M%d_%H%M%S')
    )

    dset = data_file.create_dataset(
        dset_id, shape=(data.shape[0],),
        dtype=np.dtype([
            (k, float) for k in ['index'] + list(data.keys())
        ])
    )

    dset['index'] = data.index

    for k in data.keys():
        dset[k] = data[k]

    dset.attrs['sample_rate'] = sample_rate
    dset.attrs['date_time'] = date_time.strftime('%Y-%M-%d %H:%M:%S')
    dset.attrs['site_id'] = site_id
    dset.attrs['lane_id'] = lane_id
    dset.attrs['temperature'] = temperature
    dset.attrs['license_plate'] = license_plate
    dset.attrs['sensor_calibration_factory'] = sensor_calibration_factory
    dset.attrs['distance_between_sensors'] = distance_between_sensors
    dset.attrs['sensor_type'] = sensor_type
    dset.attrs['sensors_layout'] = sensors_layout
    dset.attrs['channel_configuration'] = channel_configuration

    if kwargs:
        for k, v in kwargs.items():
            dset.attrs[k] = v

    return dset
Example #14
0
    from pandas import DataFrame as DF
    import sys

    ## this stuff should be moved to a unit test
    if "--test" in sys.argv:


        ## generate some random csv datastructure
        import faker, random, tempfile
        FK = faker.Faker()
        ncol = random.randint(1, 5)
        nrow = random.randint(100, 400)

        df = DF(dict([(key, np.random.rand(1, nrow)[0]) for key in [FK.username() for i in range(ncol-1)] + ["RT"]]))
        for k in df.keys():
            if k == "RT": continue
            if random.random() > 0.5:
                ## turn the column into a binary value
                df[k] = df[k].round()

        TR_duration = 0.5 * random.randint(2, 6)
        ## append a duration and onset
        df['duration'] = TR_duration
        df['onset'] = df.index * TR_duration

        csv_filepath = tempfile.mktemp(suffix = ".csv")
        df.to_csv(csv_filepath)

        csvf = pe.Node(name = "csvfile", interface = CSVFile())
        csvf.inputs.csv_filepath = csv_filepath
Example #15
0
import codecs 
import numpy as np
from pandas import DataFrame
import regex
import evaluation
import sys
phoneme_embeddings = dict()
for line in codecs.open("phoneme_embeddings_plm.csv","r"):
    line = line.split(",")
    key= line[0][1:-1]
    emb = line[1:]
    emb[-1] = emb[-1][:-1]
    emb = np.array([float(e) for e in emb])
    phoneme_embeddings[key] = emb

phoneme_embeddings = DataFrame(phoneme_embeddings,columns=phoneme_embeddings.keys())

clf  = KNeighborsClassifier(n_neighbors=1,algorithm="brute",metric="euclidean")
clf.fit(phoneme_embeddings.transpose(),phoneme_embeddings.columns)  

tags = set()
for test in evaluation.SimilarityTestData:
    for tag in test["tags"]:
        tags.add(tag)
tags = list(tags)
print("EVALUATION")
c = dict()
c_true = dict()
c_true_all = 0
for tag in tags:
    c[tag] = 0
Example #16
0
    def addData(self,other,
                sameIndex=True,
                mergeIndex=False,
                prefix=None,
                suffix=None,
                allowExtrapolate=False,
                interpolationMethod="values"):
        """Add data from another DataFrame or Series
        @param other: data as Pandas-DataFrame or Series
        @param sameIndex: assum both have the same indices. If False the other data will be interpolated to the current indices
        @param mergeIndex: make the result indices a mixture of the indices"""
        if not sameIndex and mergeIndex:
            raise PandasWrapperPyFoamException("Can't specify sameIndex=False and mergeIndex=True at the same time")
        if not isinstance(other,self.validOtherTypes):
             raise PandasWrapperPyFoamException("Other data is of type",type(other),
                                                "should be one of",self.validOtherTypes)
        if isinstance(other,DataFrame):
             o=other
        else:
             o=DataFrame(other)

        k=o.keys()
        if not self.__allStrings(k):
            raise PandasWrapperPyFoamException("Added data with non-string columns")
        v=k.copy()
        if prefix:
             v=[prefix+n for n in v]
        if suffix:
             v=[n+suffix for n in v]
        if len(set(v)&set(self.keys()))>0:
             raise PandasWrapperPyFoamException("Keys of this",self.keys(),"and other",v,
                                                "intersect",set(v)&set(self.keys()))
        keys=dict(zip(k,v))
        interpolate=False # only interpolate if necessary
        if len(self.index)!=len(o.index) or (self.index!=o.index).any():
             if sameIndex and not mergeIndex:
                  raise PandasWrapperPyFoamException("Other data has different index. Specify sameIndex=False or mergeIndex=True")
             ni=unique(hstack([self.index,o.index]))
             interpolate=True
             if mergeIndex:
                 minOld=min(self.index)
                 maxOld=max(self.index)

                 result=self.reindex(index=ni,copy=False).interpolate(
                    method=interpolationMethod)

                 if not allowExtrapolate:
                     result[result.index<minOld]=float("NaN")
                     result[result.index>maxOld]=float("NaN")
             else:
                  # make sure we have values at the current position
#                  o=o.reindex_axis(ni,axis='index').interpolate(method=interpolationMethod)
                  o=o.reindex(index=ni,columns=o.columns).interpolate(method=interpolationMethod)
                  # ,takeable=True
                  result=self.copy()
        else:
            result=self.copy()

        minOld=min(o.index)
        maxOld=max(o.index)
        for k,v in keys.items():
            result[v]=o[k]
            if interpolate:
                result[v]=result[v].interpolate(method=interpolationMethod)
                if not allowExtrapolate:
                     result[v][result.index<minOld]=float("NaN")
                     result[v][result.index>maxOld]=float("NaN")

        return PyFoamDataFrame(result)
Example #17
0
def _assemble_from_unit_mappings(arg, errors):
    """
    assemble the unit specifed fields from the arg (DataFrame)
    Return a Series for actual parsing

    Parameters
    ----------
    arg : DataFrame
    errors : {'ignore', 'raise', 'coerce'}, default 'raise'

        - If 'raise', then invalid parsing will raise an exception
        - If 'coerce', then invalid parsing will be set as NaT
        - If 'ignore', then invalid parsing will return the input

    Returns
    -------
    Series
    """
    from pandas import to_timedelta, to_numeric, DataFrame
    arg = DataFrame(arg)
    if not arg.columns.is_unique:
        raise ValueError("cannot assemble with duplicate keys")

    # replace passed unit with _unit_map
    def f(value):
        if value in _unit_map:
            return _unit_map[value]

        # m is case significant
        if value.lower() in _unit_map:
            return _unit_map[value.lower()]

        return value

    unit = {k: f(k) for k in arg.keys()}
    unit_rev = {v: k for k, v in unit.items()}

    # we require at least Ymd
    required = ['year', 'month', 'day']
    req = sorted(list(set(required) - set(unit_rev.keys())))
    if len(req):
        raise ValueError("to assemble mappings requires at least that "
                         "[year, month, day] be specified: [{required}] "
                         "is missing".format(required=','.join(req)))

    # keys we don't recognize
    excess = sorted(list(set(unit_rev.keys()) - set(_unit_map.values())))
    if len(excess):
        raise ValueError("extra keys have been passed "
                         "to the datetime assemblage: "
                         "[{excess}]".format(excess=','.join(excess)))

    def coerce(values):
        # we allow coercion to if errors allows
        values = to_numeric(values, errors=errors)

        # prevent overflow in case of int8 or int16
        if is_integer_dtype(values):
            values = values.astype('int64', copy=False)
        return values

    values = (coerce(arg[unit_rev['year']]) * 10000 +
              coerce(arg[unit_rev['month']]) * 100 +
              coerce(arg[unit_rev['day']]))
    try:
        values = to_datetime(values, format='%Y%m%d', errors=errors)
    except (TypeError, ValueError) as e:
        raise ValueError("cannot assemble the "
                         "datetimes: {error}".format(error=e))

    for u in ['h', 'm', 's', 'ms', 'us', 'ns']:
        value = unit_rev.get(u)
        if value is not None and value in arg:
            try:
                values += to_timedelta(coerce(arg[value]),
                                       unit=u,
                                       errors=errors)
            except (TypeError, ValueError) as e:
                raise ValueError("cannot assemble the datetimes [{value}]: "
                                 "{error}".format(value=value, error=e))

    return values
Example #18
0
from pandas import DataFrame
import pandas as pd
import matplotlib
matplotlib.use('Agg')
from matplotlib import pyplot as plt
import seaborn as sns

dir_cm = ''
dir_unmod = ''
fname = "chin_alpha_gamma.pk"
df_cm = DataFrame(pd.read_pickle(dir_cm + fname))
df_unmod = DataFrame(pd.read_pickle(dir_unmod + fname))

keys = df_cm.keys()

for key in keys:
    fig = plt.figure()
    ax = fig.add_subplot(120)
    ax.hist(df_cm[key], normed=1)
    ax.set_xlim([-180., 180.])
    ax.set_ylim([0, 0.1])
    ax = fig.add_subplot(121)
    ax.hist(df_unmod[key], normed=1)
    ax.set_ylim([0, 0.1])
    ax.set_xlim([-180., 180.])
    fname = key.replace(":", "_")
    plt.title(fname)
    plt.savefig("./plots/" + fname + ".png", dpi=300)