Example #1
0
    def test_dataframe_duplicate_column_names(self):

        # GH 9618
        expected_1 = DataFrame(columns=['a', 'a'])
        expected_2 = DataFrame(columns=[1]*100)
        expected_2.loc[0] = np.random.randn(100)
        expected_3 = DataFrame(columns=[1, 1])
        expected_3.loc[0] = ['abc', np.nan]

        result_1 = self.encode_decode(expected_1)
        result_2 = self.encode_decode(expected_2)
        result_3 = self.encode_decode(expected_3)

        assert_frame_equal(result_1, expected_1)
        assert_frame_equal(result_2, expected_2)
        assert_frame_equal(result_3, expected_3)
    def sample(self, start_state=None, size=1):
        """
        Sample from the Markov Chain.

        Parameters:
        -----------
        start_state: dict or array-like iterable
            Representing the starting states of the variables. If None is passed, a random start_state is chosen.
        size: int
            Number of samples to be generated.

        Return Type:
        ------------
        pandas.DataFrame

        Examples:
        ---------
        >>> from pgmpy.models import MarkovChain as MC
        >>> from pgmpy.factors.discrete import State
        >>> model = MC(['intel', 'diff'], [2, 3])
        >>> model.set_start_state([State('intel', 0), State('diff', 2)])
        >>> intel_tm = {0: {0: 0.25, 1: 0.75}, 1: {0: 0.5, 1: 0.5}}
        >>> model.add_transition_model('intel', intel_tm)
        >>> diff_tm = {0: {0: 0.1, 1: 0.5, 2: 0.4}, 1: {0: 0.2, 1: 0.2, 2: 0.6 }, 2: {0: 0.7, 1: 0.15, 2: 0.15}}
        >>> model.add_transition_model('diff', diff_tm)
        >>> model.sample(size=5)
           intel  diff
        0      0     2
        1      1     0
        2      0     1
        3      1     0
        4      0     2
        """
        if start_state is None:
            if self.state is None:
                self.state = self.random_state()
            # else use previously-set state
        else:
            self.set_start_state(start_state)

        sampled = DataFrame(index=range(size), columns=self.variables)
        sampled.loc[0] = [st for var, st in self.state]

        var_states = defaultdict(dict)
        var_values = defaultdict(dict)
        samples = defaultdict(dict)
        for var in self.transition_models.keys():
            for st in self.transition_models[var]:
                var_states[var][st] = list(self.transition_models[var][st].keys())
                var_values[var][st] = list(self.transition_models[var][st].values())
                samples[var][st] = sample_discrete(var_states[var][st], var_values[var][st], size=size)

        for i in range(size - 1):
            for j, (var, st) in enumerate(self.state):
                next_st = samples[var][st][i]
                self.state[j] = State(var, next_st)
            sampled.loc[i + 1] = [st for var, st in self.state]

        return sampled
Example #3
0
    def test_partial_setting_mixed_dtype(self):

        # in a mixed dtype environment, try to preserve dtypes
        # by appending
        df = DataFrame([[True, 1], [False, 2]], columns=["female", "fitness"])

        s = df.loc[1].copy()
        s.name = 2
        expected = df.append(s)

        df.loc[2] = df.loc[1]
        tm.assert_frame_equal(df, expected)

        # columns will align
        df = DataFrame(columns=['A', 'B'])
        df.loc[0] = Series(1, index=range(4))
        tm.assert_frame_equal(df, DataFrame(columns=['A', 'B'], index=[0]))

        # columns will align
        df = DataFrame(columns=['A', 'B'])
        df.loc[0] = Series(1, index=['B'])

        exp = DataFrame([[np.nan, 1]], columns=['A', 'B'],
                        index=[0], dtype='float64')
        tm.assert_frame_equal(df, exp)

        # list-like must conform
        df = DataFrame(columns=['A', 'B'])

        def f():
            df.loc[0] = [1, 2, 3]

        pytest.raises(ValueError, f)

        # TODO: #15657, these are left as object and not coerced
        df = DataFrame(columns=['A', 'B'])
        df.loc[3] = [6, 7]

        exp = DataFrame([[6, 7]], index=[3], columns=['A', 'B'],
                        dtype='object')
        tm.assert_frame_equal(df, exp)
def load(filename):
    img = cropper.getCrop(filename)
    if img is not None:
        """
            use opencv to resize and write images
        """
        resized = cv2.resize(img,(PIX,PIX))
    else:
        print "No face could be detected"
        sys.exit()
    df = DataFrame(columns=('filename', 'Image'))
    output =  ' '.join(' '.join(str(cell) for cell in row) for row in resized)
    df.loc[0] = [filename,output]
    # The Image column has pixel values separated by space; convert
    # the values to numpy arrays:
    df['Image'] = df['Image'].apply(lambda im: np.fromstring(im, sep=' '))
    X = np.vstack(df['Image'].values) / 255.  # scale pixel values to [0, 1]
    X = X.astype(np.float32)
    del df['filename']
    y = None
    return X, y