Example #1
0
 def setUp(self):
     x = [0.5, 2, 4, 5.5, 6.5, 7, 7.25, 7.3]
     y = [i * 2 for i in x]
     t = [i for i in range(len(x))]
     ts = TimeSeries(values=x, time=t, feature='x')
     ts2 = TimeSeries(values=y, time=t, feature='y')
     self.tsg = TimeSeriesGroup([ts, ts2])
Example #2
0
 def cluster(self):
     data = self.tsg.as_df()
     self.agg_clust.fit(data)
     labels = self.agg_clust.labels_
     df = self.tsg.as_df()
     df['cluster'] = labels
     mon = Monitor(dire=self.dire)
     for label, df in df.groupby(by='cluster'):
         df = df.drop('cluster', axis=1)
         mon[label] = TimeSeriesGroup(df)
     return mon
Example #3
0
 def do_kscan(self):
     kscan = {}
     for i in self.ks:
         agg_clust_i = AgglomerativeClustering(n_clusters=i, **self.kwargs)
         agg_clust_i.fit(self.tsg.as_df())
         labels = agg_clust_i.labels_
         df = self.tsg.as_df()
         df['cluster'] = labels
         mon = Monitor(dire=self.dire)
         for label, df in df.groupby(by='cluster'):
             df = df.drop('cluster', axis=1)
             mon[label] = TimeSeriesGroup(df)
         kscan[i] = mon
     return kscan
Example #4
0
    def __init__(self, tsg, nclust=1, db_file=None,
                 dtw_dist=euclidean, fast=False, radius=1):
        self.nclust = nclust
        self.dtw_dist = dtw_dist
        self.fast = fast
        self.radius = radius

        self.tsg = tsg
        self.clusters = {i: TimeSeriesGroup(self.tsg[i]) for i in self.tsg.features}
        self.evolution_of_clusters = {}

        self.db_file = db_file
        if self.db_file is None:
            self.db_file = os.path.join(os.getcwd(), os.path.split(__file__)[1][:-3]+'.db')

        if not isinstance(self.nclust, int):
            raise ValueError("nclust should be of type int")
Example #5
0
    def setUp(self):
        x = [0.5, 2, 4, 5.5, 6.5, 7, 7.25, 7.3]
        y = [i * 2 for i in x]
        t = [i for i in range(len(x))]
        ts = TimeSeries(values=x, time=t, feature='x')
        ts2 = TimeSeries(values=y, time=t, feature='y')
        self.tsg = TimeSeriesGroup([ts, ts2])

        a = Node('A')
        b = Node('B')
        c = Node('C')
        d = Node('D')

        self.first_order_deg = Edge(source=a, target=None, id=1)
        self.zero_order_prod = Edge(source=None, target=b, id=2)
        self.first_order_transition = Edge(source=a, target=b, id=3)
        self.complex_break = Edge(source=a, target=[b, c], id=4)
        self.complex_form = Edge(source=[a, b], target=c, id=5)
        self.second_order_transition = Edge(source=[a, b], target=[c, d], id=6)
        self.second_order_deg = Edge(source=[a, b], target=None, id=7)
Example #6
0
 def result(self):
     ys = [i.y.feature for i in self.dtw]
     return TimeSeriesGroup(self.tsg.loc[ys])
Example #7
0
def get_tsg(go_id, tsg):
    dups = get_dups(go_id, tsg)
    return TimeSeriesGroup(data.loc[dups])
Example #8
0
site.addsitedir(r'/home/b3053674/Documents')
from pytseries.core import TimeSeries, TimeSeriesGroup
from pytseries.clust import TimeSeriesKMeans




dire = r'/home/b3053674/Documents/pytseries/Microarray2/GOclustering'

microarray_data_file = r'/home/b3053674/Documents/pytseries/Microarray2/MeanMicroarrayDEGS_gt_75.csv'

data = pandas.read_csv(microarray_data_file, index_col=[0])
data.columns = [int(i) for i in data.columns]
data = data.dropna(how='all')

tsg = TimeSeriesGroup(data)
tsg = tsg.interpolate(num=30)
tsg = tsg.norm()


pattern = os.path.join(dire, 'GO*.csv')

files = glob.glob(pattern)

df_dct = {}
for i in files:
    go_term = os.path.split(i)[1][:-4]
    df_dct[go_term] = pandas.read_csv(i, index_col=[0])

go_df = pandas.concat(df_dct)
go_df.columns = ['genes']