def setUp(self): x = [0.5, 2, 4, 5.5, 6.5, 7, 7.25, 7.3] y = [i * 2 for i in x] t = [i for i in range(len(x))] ts = TimeSeries(values=x, time=t, feature='x') ts2 = TimeSeries(values=y, time=t, feature='y') self.tsg = TimeSeriesGroup([ts, ts2])
def cluster(self): data = self.tsg.as_df() self.agg_clust.fit(data) labels = self.agg_clust.labels_ df = self.tsg.as_df() df['cluster'] = labels mon = Monitor(dire=self.dire) for label, df in df.groupby(by='cluster'): df = df.drop('cluster', axis=1) mon[label] = TimeSeriesGroup(df) return mon
def do_kscan(self): kscan = {} for i in self.ks: agg_clust_i = AgglomerativeClustering(n_clusters=i, **self.kwargs) agg_clust_i.fit(self.tsg.as_df()) labels = agg_clust_i.labels_ df = self.tsg.as_df() df['cluster'] = labels mon = Monitor(dire=self.dire) for label, df in df.groupby(by='cluster'): df = df.drop('cluster', axis=1) mon[label] = TimeSeriesGroup(df) kscan[i] = mon return kscan
def __init__(self, tsg, nclust=1, db_file=None, dtw_dist=euclidean, fast=False, radius=1): self.nclust = nclust self.dtw_dist = dtw_dist self.fast = fast self.radius = radius self.tsg = tsg self.clusters = {i: TimeSeriesGroup(self.tsg[i]) for i in self.tsg.features} self.evolution_of_clusters = {} self.db_file = db_file if self.db_file is None: self.db_file = os.path.join(os.getcwd(), os.path.split(__file__)[1][:-3]+'.db') if not isinstance(self.nclust, int): raise ValueError("nclust should be of type int")
def setUp(self): x = [0.5, 2, 4, 5.5, 6.5, 7, 7.25, 7.3] y = [i * 2 for i in x] t = [i for i in range(len(x))] ts = TimeSeries(values=x, time=t, feature='x') ts2 = TimeSeries(values=y, time=t, feature='y') self.tsg = TimeSeriesGroup([ts, ts2]) a = Node('A') b = Node('B') c = Node('C') d = Node('D') self.first_order_deg = Edge(source=a, target=None, id=1) self.zero_order_prod = Edge(source=None, target=b, id=2) self.first_order_transition = Edge(source=a, target=b, id=3) self.complex_break = Edge(source=a, target=[b, c], id=4) self.complex_form = Edge(source=[a, b], target=c, id=5) self.second_order_transition = Edge(source=[a, b], target=[c, d], id=6) self.second_order_deg = Edge(source=[a, b], target=None, id=7)
def result(self): ys = [i.y.feature for i in self.dtw] return TimeSeriesGroup(self.tsg.loc[ys])
def get_tsg(go_id, tsg): dups = get_dups(go_id, tsg) return TimeSeriesGroup(data.loc[dups])
site.addsitedir(r'/home/b3053674/Documents') from pytseries.core import TimeSeries, TimeSeriesGroup from pytseries.clust import TimeSeriesKMeans dire = r'/home/b3053674/Documents/pytseries/Microarray2/GOclustering' microarray_data_file = r'/home/b3053674/Documents/pytseries/Microarray2/MeanMicroarrayDEGS_gt_75.csv' data = pandas.read_csv(microarray_data_file, index_col=[0]) data.columns = [int(i) for i in data.columns] data = data.dropna(how='all') tsg = TimeSeriesGroup(data) tsg = tsg.interpolate(num=30) tsg = tsg.norm() pattern = os.path.join(dire, 'GO*.csv') files = glob.glob(pattern) df_dct = {} for i in files: go_term = os.path.split(i)[1][:-4] df_dct[go_term] = pandas.read_csv(i, index_col=[0]) go_df = pandas.concat(df_dct) go_df.columns = ['genes']