コード例 #1
0
ファイル: 2tab.py プロジェクト: maroy/FCP
def to_tab(is_training, is_raw, in_path, add_cover_type):

    out_path = re.sub("csv$|data$", "tab", in_path)

    sample_set = SampleSet(is_training, is_raw)
    sample_set.read(in_path)

    perfection = {}

    if add_cover_type:
        with open('perfectSubmission.csv', 'rb') as perfect:
            items = [line.strip().split(',') for line in perfect.readlines()]
            for item in items:
                perfection[item[0]] = item[1]

        for sample in sample_set:
            sample.Cover_Type = perfection[str(sample.Id)]

    if not (is_training or add_cover_type):
        del columns[-1]

    with open(out_path, 'w') as f:
        for i in range(0,3):
            f.write("\t".join([col[i] for col in columns]) + "\n")

        for sample in sample_set:
            values = []
            for col in columns:
                values.append(getattr(sample, col[0]))

            f.write("\t".join([str(v) for v in values]) + "\n")
コード例 #2
0
ファイル: 2tab.py プロジェクト: maroy/FCP
def to_tab(is_training, is_raw, in_path, add_cover_type):

    out_path = re.sub("csv$|data$", "tab", in_path)

    sample_set = SampleSet(is_training, is_raw)
    sample_set.read(in_path)

    perfection = {}

    if add_cover_type:
        with open('perfectSubmission.csv', 'rb') as perfect:
            items = [line.strip().split(',') for line in perfect.readlines()]
            for item in items:
                perfection[item[0]] = item[1]

        for sample in sample_set:
            sample.Cover_Type = perfection[str(sample.Id)]

    if not (is_training or add_cover_type):
        del columns[-1]

    with open(out_path, 'w') as f:
        for i in range(0, 3):
            f.write("\t".join([col[i] for col in columns]) + "\n")

        for sample in sample_set:
            values = []
            for col in columns:
                values.append(getattr(sample, col[0]))

            f.write("\t".join([str(v) for v in values]) + "\n")
コード例 #3
0
 def test_probability_calc_one_column(self):
     matrix = pd.read_csv('test_files/test_probability_calc.csv', dtype=np.str)
     samples = SampleSet(matrix)
     a_prob = samples.probability(['a'])
     self.assertEqual(float(a_prob.query('a=="catA"').joint_prob), 0.3)
     self.assertEqual(float(a_prob.query('a=="catB"').joint_prob), 0.25)
     self.assertEqual(float(a_prob.query('a=="catC"').joint_prob), 0.45)
コード例 #4
0
 def test_a_b_given_c(self):
     '''
     The test file has 200 rows where c==1, and MI(a,b)==1, 
     and 100 rows where c==2 and MI(a,b)==0
     '''
     matrix = pd.read_csv('test_files/test_conditional_mutual_information.csv', dtype=np.str)
     samples = SampleSet(matrix)
     self.assertEqual(samples.mutual_information(['a'], ['b'], ['c'])['mi'], 0.6666666666666666)
コード例 #5
0
 def test_separate_categories(self):
     '''
     Test that if categories are not separate an exception is thrown
     '''
     rows_3_cats = [['1', '1'], ['2', '2'], ['3', '3']]
     df = pd.DataFrame(rows_3_cats, columns=list('ab'))
     sase = SampleSet(df)
     with self.assertRaises(Exception):
         sase.mutual_information(['a', 'b'], ['b'])
コード例 #6
0
 def test_probability_calc_shuffled_columns(self):
     matrix = pd.read_csv(
         'test_files/test_probability_calc_shuffled_columns.csv',
         dtype=np.str)
     samples = SampleSet(matrix)
     a_prob = samples.probability(['b', 'a'])
     self.assertEqual(
         float(a_prob.query('b=="1" and a=="catA"').joint_prob), 0.2)
     self.assertEqual(
         float(a_prob.query('b=="2" and a=="catA"').joint_prob), 0.1)
コード例 #7
0
    def test_a_b_mutual_information_declining(self):
        '''
        1. Start with 99 rows, two vars, 3 categories, perfectly correlated
        2. Add 100 rows, randomly generated (different random seed for each var), see that mutual information is smaller
        3. Repeat 2, and see that mutual information is smaller yet
        '''
        # Create perfectly correlated df
        df = pd.DataFrame([], columns=list('ab'))
        for i in range(33):
            df1 = pd.DataFrame([['1', '1'], ['2', '2'], ['3', '3']],
                               columns=list('ab'))
            df = df.append(df1, ignore_index=True)
        sase = SampleSet(df)
        mi1 = sase.mutual_information(['a'], ['b'])['mi']
        self.assertAlmostEqual(mi1, np.log2(3))

        # Append 100 uncorrelated rows
        rows = [[str(random.randint(1, 3)),
                 str(random.randint(1, 3))] for i in range(100)]
        randDf = pd.DataFrame(rows, columns=list('ab'))
        df = df.append(randDf, ignore_index=True)
        sase = SampleSet(df)
        mi2 = sase.mutual_information(['a'], ['b'])['mi']
        self.assertTrue(mi2 < mi1)

        # Append another 100 uncorrelated rows
        rows = [[str(random.randint(1, 3)),
                 str(random.randint(1, 3))] for i in range(100)]
        randDf = pd.DataFrame(rows, columns=list('ab'))
        df = df.append(randDf, ignore_index=True)
        sase = SampleSet(df)
        mi3 = sase.mutual_information(['a'], ['b'])['mi']
        self.assertTrue(mi3 < mi2)
コード例 #8
0
    def __init__(self):
        self.__nodes__ = SampleSet()
        self.__edges__ = SampleSet()
        self.__traits__ = {}
        self.__savepoint_set__ = False
        self.__savepoint_name__ = None
        self.__undo_log__ = []

        self.__neighbor_sets__ = {}
        # UndirectedDict is defined below in this file.
        self.__trait_dict_type__ = UndirectedDict
コード例 #9
0
    def __init__(self):
        self.__nodes__ = SampleSet()
        self.__edges__ = SampleSet()
        self.__traits__ = {}
        self.__neighbor_sets__ = {}

        self.__savepoint_set__ = False
        self.__savepoint_name__ = None
        self.__undo_log__ = []
        #
        self.__in_neighbor_sets__ = {}
        self.__out_neighbor_sets__ = {}
        self.__trait_dict_type__ = dict
コード例 #10
0
    def test_mutual_information_significance_extreme(self):
        '''
        Test that when generating A, B randomly according to P(A), P(B), the % of Type I errors matches the significance.
        '''
        errors = 0
        tests = 10
        significance = 0.95
        for i in range(tests):
            df = self.generate_random_a_b(100000, [0.01], [0.99])
            sase = SampleSet(df)
            p_val = sase.mutual_information(['a'], ['b'], debug=True)['p_val']
            if p_val > significance:
                errors += 1

        std = np.sqrt((significance * (1 - significance)) / tests)
        self.assertAlmostEqual(1 - significance, errors / tests, delta=3 * std)
コード例 #11
0
ファイル: test_graph.py プロジェクト: ranmoshe/Inference
    def test_r1(self):
        '''
        See that IC*.Step3.R1 works:
        Given A -> C, B -> C, C - D, see that the step outputs C -*> D
        '''
        a_series = generate_random_series(100000, 'A', [0.2])
        b_series = generate_random_series(100000, 'B', [0.2])
        c_series = generate_random_series(100000, 'C', [0.2])
        d_series = generate_random_series(100000, 'D', [0.2])
        df = pd.DataFrame({
            'A': a_series,
            'B': b_series,
            'C': c_series,
            'D': d_series
        })
        ic_graph = IC_Graph(SampleSet(df))
        ic_graph.graph.add_edges_from([('A', 'C'), ('B', 'C')], out='C')
        ic_graph.graph.add_edge('C', 'D')
        ic_graph.ic_step_3_r1()
        directed = [
            t for t in ic_graph.graph.edges.data('out') if t[2] is not None
        ]
        directed_star = [
            t for t in ic_graph.graph.edges.data('out_star')
            if t[2] is not None
        ]

        self.assertEqual([('A', 'C', 'C'), ('B', 'C', 'C')], directed)
        self.assertEqual([('C', 'D', 'D')], directed_star)
        self.assertEqual(len(ic_graph.graph.edges), 3)
コード例 #12
0
ファイル: test_graph.py プロジェクト: ranmoshe/Inference
    def test_common_cause(self):
        '''
        Season affects both the temperature and the chance for a flu epidemic.
        The algorithm can't determine causal relationship by this data, so we expect an undirected chain.
        '''

        season_series = generate_random_series(100000, 'season',
                                               [0.35, 0.15, 0.35])
        df = pd.DataFrame({'season': season_series})
        df['season'] = df['season'].map({
            'season_0': 'winter',
            'season_1': 'spring',
            'season_2': 'summer',
            'season_3': 'fall'
        })
        df['temperature'] = df.apply(
            lambda row: self.season_to_temperature(row['season']), axis=1)
        df['flu_epidemic'] = df.apply(
            lambda row: self.season_to_flu_epidemic(row['season']), axis=1)
        ic_graph = IC_Graph(SampleSet(df))
        ic_graph.build_graph()
        directed = [
            t for t in ic_graph.graph.edges.data('out') if t[2] is not None
        ]

        self.assertEqual([], directed)
        self.assertEqual([('season', 'temperature'),
                          ('season', 'flu_epidemic')],
                         [t for t in ic_graph.graph.edges])
コード例 #13
0
    def test_mutual_information_significance_with_conditional(self):
        '''
        Test that when generating A, B randomly according to P(A), P(B), for a numbr of categories of a conditional group C, the % of Type I errors matches the significance.
        '''
        errors = 0
        tests = 10
        significance = 0.95
        for i in range(tests):
            final = pd.DataFrame({'a': [], 'b': [], 'c': []})
            for j in [0, 5, 10]:
                df = self.generate_random_a_b(10000, [0.05 + j], [0.95 - j])
                df['c'] = j
                final = final.append(df)
            sase = SampleSet(final)
            p_val = sase.mutual_information(['a'], ['b'], ['c'],
                                            debug=True)['p_val']
            if p_val > significance:
                errors += 1

        std = np.sqrt((significance * (1 - significance)) / tests)
        self.assertAlmostEqual(1 - significance, errors / tests, delta=3 * std)
コード例 #14
0
    def test_mutual_information_vs_categories(self):
        '''
        Test that mutual information is similar to log(number of categories) for perfectly correlated sets
        '''
        # test for 3 categories
        rows_3_cats = [['1', '1'], ['2', '2'], ['3', '3']]
        df = pd.DataFrame(rows_3_cats, columns=list('ab'))
        sase = SampleSet(df)
        self.assertAlmostEqual(sase.mutual_information(['a'], ['b'])['mi'], np.log2(3))

        # test for 100 categories
        rows_100_cats = [[str(i), str(i)] for i in range(100)]
        df = pd.DataFrame(rows_100_cats, columns=list('ab'))
        sase = SampleSet(df)
        self.assertAlmostEqual(sase.mutual_information(['a'], ['b'])['mi'], np.log2(100))
コード例 #15
0
ファイル: test_graph.py プロジェクト: ranmoshe/Inference
    def test_3_causes_structure(self):
        '''
        We have 3 users (A, B, C) who are the only users who visit a web page 
        Any of them, when visiting, may click something within the page at a 1% chance
        We want to capture a v structure, but with all 3 users leading to the page
        '''
        rows = 100000
        A = generate_random_series(rows, 'A', [0.01])
        B = generate_random_series(rows, 'B', [0.01])
        C = generate_random_series(rows, 'C', [0.01])
        df = pd.DataFrame({'A': A, 'B': B, 'C': C})
        df['click'] = df.apply(lambda row: self.is_click(row), axis='columns')
        ic_graph = IC_Graph(SampleSet(df))
        ic_graph.build_graph()
        directed = [
            t for t in ic_graph.graph.edges.data('out') if t[2] is not None
        ]

        self.assertEqual([('A', 'click', 'click'), ('B', 'click', 'click'),
                          ('C', 'click', 'click')], directed)
        self.assertEqual(len(ic_graph.graph.edges), 3)
コード例 #16
0
ファイル: test_graph.py プロジェクト: ranmoshe/Inference
    def test_v_structure(self):
        '''
        We have a cat, an owner and a kitchen.
        The cat, left alone, messes up the kitchen.
        The owner tidies up, and also prevents the cat from messing up.
        The kitchen is either tidy or messy.
        Let's see if the graph captures that.
        '''
        df = generate_random_a_b(100000, [0.3], [0.2],
                                 a_name='cat',
                                 b_name='owner')
        df['cat'] = df['cat'].map({'cat_0': 'near', 'cat_1': 'far'})
        df['owner'] = df['owner'].map({'owner_0': 'near', 'owner_1': 'far'})
        df['kitchen'] = df.apply(lambda row: self.kitchen_state(row), axis=1)
        ic_graph = IC_Graph(SampleSet(df))
        ic_graph.build_graph()
        directed = [
            t for t in ic_graph.graph.edges.data('out') if t[2] is not None
        ]

        self.assertEqual([('cat', 'kitchen', 'kitchen'),
                          ('owner', 'kitchen', 'kitchen')], directed)
        self.assertEqual(len(ic_graph.graph.edges), 2)
コード例 #17
0
ファイル: test_graph.py プロジェクト: ranmoshe/Inference
    def test_3_node_chain(self):
        '''
        We have a dog, a cat and a mouse. 
        Sometimes the dog feels like chasing the cat, and then the cat would mostly run. 
        If the cat is left alone, it can chase the mouse, and then the mouse would mostly run.
        The algorithm can't determine causal relationship by this data, so we expect an undirected chain.
        '''
        dog_series = generate_random_series(100000, 'dog', [0.2])
        df = pd.DataFrame({'dog': dog_series})
        df['dog'] = df['dog'].map({'dog_0': 'resting', 'dog_1': 'running'})
        df['cat'] = df.apply(
            lambda row: self.hunted_state(row['dog'], 0.8, 0.2), axis=1)
        df['mouse'] = df.apply(
            lambda row: self.hunted_state(row['cat'], 0.8, 0.2), axis=1)
        ic_graph = IC_Graph(SampleSet(df))
        ic_graph.build_graph()
        directed = [
            t for t in ic_graph.graph.edges.data('out') if t[2] is not None
        ]

        self.assertEqual([], directed)
        self.assertEqual([('dog', 'cat'), ('cat', 'mouse')],
                         [t for t in ic_graph.graph.edges])
コード例 #18
0
ファイル: test_graph.py プロジェクト: ranmoshe/Inference
    def test_one_starred_relation(self):
        '''
        A and B cause C, C causes D
        '''
        rows = 100000
        A = generate_random_series(rows, 'A', [0.1])
        B = generate_random_series(rows, 'B', [0.1])
        df = pd.DataFrame({'A': A, 'B': B})
        df['C'] = df.apply(lambda row: self.A_B_dependency(row),
                           axis='columns')
        df['D'] = df.apply(lambda row: self.C_dependency(row), axis='columns')
        ic_graph = IC_Graph(SampleSet(df))
        ic_graph.build_graph()
        directed = [
            t for t in ic_graph.graph.edges.data('out') if t[2] is not None
        ]
        directed_star = [
            t for t in ic_graph.graph.edges.data('out_star')
            if t[2] is not None
        ]

        self.assertEqual([('A', 'C', 'C'), ('B', 'C', 'C')], directed)
        self.assertEqual([('C', 'D', 'D')], directed_star)
        self.assertEqual(len(ic_graph.graph.edges), 3)
コード例 #19
0
 def test_entropy(self):
     matrix = pd.read_csv('test_files/test_entropy.csv', dtype=np.str)
     samples = SampleSet(matrix)
     self.assertEqual(samples.entropy(['c']), 0)
     self.assertEqual(samples.entropy(['a']), 1)
     self.assertEqual(samples.entropy(['c', 'a']), 1)
コード例 #20
0
 def test_a_b_mutual_info(self):
     matrix = pd.read_csv('test_files/test_mutual_information.csv',
                          dtype=np.str)
     samples = SampleSet(matrix)
     self.assertEqual(samples.mutual_information(['a'], ['b'])['mi'], 1)
コード例 #21
0
class DirectedGraphData(GraphData):
    def __init__(self):
        self.__nodes__ = SampleSet()
        self.__edges__ = SampleSet()
        self.__traits__ = {}
        self.__neighbor_sets__ = {}

        self.__savepoint_set__ = False
        self.__savepoint_name__ = None
        self.__undo_log__ = []
        #
        self.__in_neighbor_sets__ = {}
        self.__out_neighbor_sets__ = {}
        self.__trait_dict_type__ = dict

    def add_node(self, node):
        if node in self.__nodes__:
            return

        if self.__savepoint_set__:
            self.__undo_log__.append((GraphData.__ADD_NODE__, node))

        self.__nodes__.add(node)
        self.__neighbor_sets__[node] = set()
        #
        self.__in_neighbor_sets__[node] = set()
        self.__out_neighbor_sets__[node] = set()

    def delete_node(self, node):
        if node not in self.__nodes__:
            return

        if self.__savepoint_set__:
            self.__undo_log__.append((GraphData.__DEL_NODE__, node, \
                [(node, n) for n in self.__out_neighbor_sets__[node]] + \
                [(n, node) for n in self.__in_neighbor_sets__[node]]))

        for neighbor in self.__neighbor_sets__[node]:
            self.__neighbor_sets__[neighbor].remove(node)
        del self.__neighbor_sets__[node]
        self.__nodes__.remove(node)
        for trait, trait_dict in self.__traits__.items():
            if node in trait_dict:
                del trait_dict[node]
        #
        for neighbor in self.__in_neighbor_sets__[node]:
            self.__out_neighbor_sets__[neighbor].remove(node)
            self.__edges__.remove((neighbor, node))
        for neighbor in self.__out_neighbor_sets__[node]:
            self.__in_neighbor_sets__[neighbor].remove(node)
            self.__edges__.remove((node, neighbor))

        del self.__in_neighbor_sets__[node]
        del self.__out_neighbor_sets__[node]

    def add_edge(self, source, target):
        if (source, target) in self.__edges__:
            return

        if self.__savepoint_set__:
            self.__undo_log__.append(
                (GraphData.__ADD_EDGE__, (source, target)))

        self.__edges__.add((source, target))
        self.__neighbor_sets__[source].add(target)
        self.__neighbor_sets__[target].add(source)
        #
        self.__out_neighbor_sets__[source].add(target)
        self.__in_neighbor_sets__[target].add(source)

    def delete_edge(self, source, target):
        if (source, target) not in self.__edges__:
            return

        if self.__savepoint_set__:
            self.__undo_log__.append(
                (GraphData.__DEL_EDGE__, (source, target)))

        if (target, source) not in self.__edges__:
            self.__neighbor_sets__[source].remove(target)
            self.__neighbor_sets__[target].remove(source)
        self.__in_neighbor_sets__[target].remove(source)
        self.__out_neighbor_sets__[source].remove(target)
        edge = (source, target)
        self.__edges__.remove(edge)
        for trait, trait_dict in self.__traits__.items():
            if edge in trait_dict:
                del trait_dict[edge]

    def is_directed(self):
        return True

    def has_edge(self, source, target):
        return (source, target) in self.__edges__

    # Caution: Returns editable copy!
    def out_neighbors(self, node):
        return self.__out_neighbor_sets__[node]

    # Caution: Returns editable copy!
    def in_neighbors(self, node):
        return self.__in_neighbor_sets__[node]
コード例 #22
0
class GraphData:
    def __init__(self):
        self.__nodes__ = SampleSet()
        self.__edges__ = SampleSet()
        self.__traits__ = {}
        self.__savepoint_set__ = False
        self.__savepoint_name__ = None
        self.__undo_log__ = []

        self.__neighbor_sets__ = {}
        # UndirectedDict is defined below in this file.
        self.__trait_dict_type__ = UndirectedDict

    __ADD_TRAIT__ = 0
    __ADD_NODE__ = 1
    __ADD_EDGE__ = 2
    __DEL_NODE__ = 3
    __DEL_EDGE__ = 4

    @classmethod
    def FromNetworkX(cls, nx_graph):
        v2 = nx.__version__ >= '2.0'

        G = cls()
        for node in nx_graph.nodes():
            G.add_node(node)
            if v2:
                d = nx_graph.nodes[node]
            else:
                d = nx_graph.node[node]
            for attribute, value in d.items():
                if attribute not in G.__traits__:
                    G.add_trait(attribute)
                G[attribute][node] = value

        for (a, b) in nx_graph.edges():
            G.add_edge(a, b)
            if v2:
                d = nx_graph.edges[(a, b)]
            else:
                d = nx_graph.edge[(a, b)]
            for attribute, value in d.items():
                if attribute not in G.__traits__:
                    G.add_trait(attribute)
                G[attribute][(a, b)] = value

        return G

    def add_trait(self, name):
        if name not in self.__traits__:
            self.__traits__[name] = \
                SavepointDictWrapper(self.__trait_dict_type__())
            if self.__savepoint_set__:
                self.__undo_log__.append((GraphData.__ADD_TRAIT__, name))

    def add_node(self, node):
        if node in self.__nodes__:
            return

        if self.__savepoint_set__:
            self.__undo_log__.append((GraphData.__ADD_NODE__, node))

        self.__nodes__.add(node)
        self.__neighbor_sets__[node] = set()

    def delete_node(self, node):
        if node not in self.__nodes__:
            return

        if self.__savepoint_set__:
            self.__undo_log__.append((GraphData.__DEL_NODE__, node, \
                [(node, n) for n in self.__neighbor_sets__[node]]))

        for neighbor in self.__neighbor_sets__[node]:
            self.__neighbor_sets__[neighbor].remove(node)
            self.__edges__.remove((min(node, neighbor), max(node, neighbor)))
        del self.__neighbor_sets__[node]
        self.__nodes__.remove(node)
        for trait, trait_dict in self.__traits__.items():
            if node in trait_dict:
                del trait_dict[node]

    def add_edge(self, source, target):
        source_ = min(source, target)
        target_ = max(source, target)

        if (source_, target_) in self.__edges__:
            return

        if self.__savepoint_set__:
            self.__undo_log__.append(
                (GraphData.__ADD_EDGE__, (source_, target_)))

        self.__edges__.add((source_, target_))
        self.__neighbor_sets__[source].add(target)
        self.__neighbor_sets__[target].add(source)

    def delete_edge(self, source, target):
        self.__neighbor_sets__[source].remove(target)
        self.__neighbor_sets__[target].remove(source)
        source_ = min(source, target)
        target_ = max(source, target)

        edge = (source_, target_)
        if edge not in self.__edges__:
            return

        if self.__savepoint_set__:
            self.__undo_log__.append((GraphData.__DEL_EDGE__, edge))

        self.__edges__.remove(edge)
        for trait, trait_dict in self.__traits__.items():
            if edge in trait_dict:
                del trait_dict[edge]

    def nodes(self):
        return self.__nodes__

    def edges(self):
        return self.__edges__

    def num_nodes(self):
        return len(self.__nodes__)

    def num_edges(self):
        return len(self.__edges__)

    def is_directed(self):
        return False

    def has_node(self, node):
        return node in self.__nodes__

    def has_edge(self, source, target):
        return (min(source, target), max(source, target)) in self.__edges__

    def random_node(self):
        return self.__nodes__.randomly_sample()

    def random_edge(self):
        return self.__edges__.randomly_sample()

    # Caution: Returns editable copy!
    def neighbors(self, node):
        return self.__neighbor_sets__[node]

    def __getitem__(self, key):
        if key not in self.__traits__:
            raise ValueError("Error! Trait %s not found in graph. " % key + \
                "Use add_trait(%s) first." % key)
        return self.__traits__[key]

    def __setitem__(self, key, value):
        raise ValueError("Error! Traits must be set via add_trait().")

    def copy(self):
        if self.is_directed():
            c = DirectedGraphData()
        else:
            c = GraphData()
        for node in self.__nodes__:
            c.add_node(node)
        for (a, b) in self.__edges__:
            c.add_edge(a, b)
        for trait_name, trait_dict in self.__traits__.items():
            c.add_trait(trait_name)
            for element, trait_value in trait_dict.items():
                c[trait_name][element] = trait_value
        return c

    # `name` is for debugging purposes only.
    def set_savepoint(self, name=None):
        if self.__savepoint_set__:
            if self.__savepoint_name__ is not None:
                old_name_str = " (name of previous savepoint: %s)" % \
                    self.__savepoint_name__
            else:
                old_name_str = ""
            raise ValueError("Error! Setting a graph_data savepoint when " + \
                "one is already set!" + old_name_str)
        self.__savepoint_name__ = name
        self.__savepoint_set__ = True
        self.__undo_log__ = []
        for _, trait_dict in self.__traits__.items():
            trait_dict.set_savepoint()

    def restore_to_savepoint(self):
        # Temporarily prevent these 'changes' from being put in the log.
        self.__savepoint_set__ = False
        for _, trait_dict in self.__traits__.items():
            trait_dict.restore_to_savepoint()
            # Don't bother recording the 'changes' made below.
            trait_dict.clear_savepoint()

        for i in range(0, len(self.__undo_log__)):
            undo_data = self.__undo_log__[(len(self.__undo_log__) - 1) - i]
            undo_type = undo_data[0]
            if undo_type == GraphData.__ADD_TRAIT__:
                del self.__traits__[undo_data[1]]
            elif undo_type == GraphData.__ADD_NODE__:
                self.delete_node(undo_data[1])
            elif undo_type == GraphData.__ADD_EDGE__:
                self.delete_edge(undo_data[1][0], undo_data[1][1])
            elif undo_type == GraphData.__DEL_NODE__:
                self.add_node(undo_data[1])
                for edge in undo_data[2]:
                    self.add_edge(edge[0], edge[1])
            else:  # GraphData.__DEL_EDGE__
                self.add_edge(undo_data[1][0], undo_data[1][1])

        # Restore the savepoint_set status.
        self.__savepoint_set__ = True
        for _, trait_dict in self.__traits__.items():
            trait_dict.set_savepoint()

    def clear_savepoint(self):
        self.__savepoint_set__ = False
        self.__undo_log__ = []
        self.__savepoint_name__ = None
        for _, trait_dict in self.__traits__.items():
            trait_dict.clear_savepoint()
コード例 #23
0
    "CRP",
    "hnf1",
    "reb1",
    "nfkb",
    "pdr3"
]

TESTING_SETS = [
    "E2F200",
    "mcb",
    "creb"
]


if __name__ == '__main__':
    model = linear_model.LinearRegression()

    samples = SampleSet(TRAINING_SETS, TESTING_SETS)

    model.fit(samples.training_x_samples, samples.training_y_samples)

    print("Coefficients: ", model.coef_)

    print(f"Training set variance score: {model.score(samples.training_x_samples, samples.training_y_samples)}")
    print(f"Testing set variance score: {model.score(samples.testing_x_samples, samples.testing_y_samples)}")

    result_file = open("coef.txt", "w")
    for x in model.coef_:
        result_file.write("{:.10e}".format(x))