def getData(Config, fields): t = TimeSeries(Config) s = Stats(Config) r = s.sget("stats.hostlist") for i in r: print "Hostname: %s" % i line = "when\t\t\t\t" for f in fields: if f != 'when': line += f + "\t" print "%s" % line x = t.zget(i) for y in x: line = "%s" % asctime(localtime(y['when'])) line += "\t" for f in fields: if f != 'when': try: if type(y[f]) == 'int': line += y[f] + "\t" elif type(y[f]) == 'float': line += "%.2f" % y[f] line += "\t" else: line += str(y[f]) + "\t" except: line += "-\t" print "%s" % line print '\n'
def getData(Config, fields): t = TimeSeries(Config) s = Stats(Config) r = s.sget("stats.hostlist") for i in r: print "Hostname: %s" % i line = "when\t\t\t\t" for f in fields: if f != 'when': line += f +"\t" print "%s" % line x = t.zget(i) for y in x: line = "%s" % asctime( localtime( y['when'] )) line += "\t" for f in fields: if f != 'when': try: if type(y[f]) == 'int': line += y[f] + "\t" elif type(y[f]) == 'float': line += "%.2f" % y[f] line += "\t" else: line += str(y[f]) + "\t" except: line += "-\t" print "%s" % line print '\n'
def test_interpolation(): a = TS.TimeSeries([0, 5, 10], [1, 2, 3]) b = TS.TimeSeries([2.5, 7.5], [100, -100]) # Simple cases assert a.interpolate([1]) == TS.TimeSeries([1], [1.2]) assert a.interpolate(b.times()) == TS.TimeSeries([2.5, 7.5], [1.5, 2.5]) # Boundary conditions assert a.interpolate([-100, 100]) == TS.TimeSeries([-100, 100], [1, 3])
def __init__(self, queue, Config, threadID): self.__queue = queue self.Config = Config self.threadName = "responder-" + str(threadID) threading.Thread.__init__(self, name=self.threadName) # myIP = socket.gethostbyname(socket.gethostname()) self._db = TimeSeries()
def test_sub(): a = TS.TimeSeries([0, 5, 10], [1, 2, 3]) b = TS.TimeSeries([0, 5, 10], [10, 20, 30]) c = 100 d = TS.TimeSeries([0, 1, 2], [1, 2, 3]) assert b - a == TS.TimeSeries([0, 5, 10], [9, 18, 27]) assert a - c == TS.TimeSeries([0, 5, 10], [-99, -98, -97]) with raises(ValueError): a - d
def __init__(self, queue, Config, threadID): self.__queue = queue self._config = Config self.threadName = "responder-" + str(threadID) threading.Thread.__init__(self, name=self.threadName) logger.debug("responder started: %s" % self.threadName) # setup the time series connection self._db = TimeSeries(self._config)
def test_add(): a = TS.TimeSeries([0, 5, 10], [1, 2, 3]) b = TS.TimeSeries([0, 5, 10], [10, 20, 30]) c = 100 d = TS.TimeSeries([0, 1, 2], [1, 2, 3]) assert a + b == TS.TimeSeries([0, 5, 10], [11, 22, 33]) assert a + c == TS.TimeSeries([0, 5, 10], [101, 102, 103]) with raises(ValueError): a + d assert a + b == b + a assert c + a == a + c with raises(ValueError): d + a
def test_mul(): a = TS.TimeSeries([0, 5, 10], [1, 2, 3]) b = TS.TimeSeries([0, 5, 10], [10, 20, 30]) c = 100 d = TS.TimeSeries([0, 1, 2], [1, 2, 3]) assert a * b == TS.TimeSeries([0, 5, 10], [10, 40, 90]) assert a * c == TS.TimeSeries([0, 5, 10], [100, 200, 300]) with raises(ValueError): a * d assert a * b == b * a assert c * a == a * c with raises(ValueError): d * a
def ForeCast(): # return jsonify({'ip': request.remote_addr}), 200 try: perdictcount, forcastSteps, p, q, d, s, cType = GetParameters(request) data = json.loads(json.dumps(request.json['Data'])) PredictResult, forecast, aic, hqic = ts.main(data, forcastSteps, perdictcount, p, q, d, s, cType) result = { 'PredictResult': PredictResult, 'forecast': forecast, 'aic': aic, 'hqic': hqic, 'Exception': '' } return jsonify(result) except: return jsonify({ 'PredictResult': '', 'forecast': '', 'Exception': GetException() })
def GetDecompose(): try: data = json.loads(json.dumps(request.json['Data'])) decomposition = ts.Decompose(data) result = { "trend": dict( zip(decomposition.trend.index.format(), decomposition.trend.fillna('null'))), "resid": dict( zip(decomposition.resid.index.format(), decomposition.resid.fillna('null'))), "seasonal": dict( zip(decomposition.seasonal.index.format(), decomposition.seasonal.fillna('null'))), 'Exception': '' } return jsonify(result) except: return jsonify({ 'PredictResult': '', 'forecast': '', 'Exception': GetException() })
def testUpload(self): try: x = input("enter File Path ") self.ts = TimeSeries.TimeSeries(x) self.mainMenu() except: print("unable to load File") self.mainMenu()
def _get_ts_df(): tsg = TSG.TimeSeriesGenerator() list_ts = tsg.generate_month_data() ts = TS.TimeSeries() ts.set_ts_list(list_ts, ["TIME", "VALUE"]) df = ts.get_ts_df() return df
def GetPlots(): try: data = json.loads(json.dumps(request.json['Data'])) fileName = ts.PlotAcf(data) return jsonify({"fileName": fileName}) except: return jsonify({ 'PredictResult': '', 'forecast': '', 'Exception': GetException() })
def main(): ## reading test data data_file = '../data/o_10631' data_df = pd.read_csv(data_file, header=None) # re-define columns' name data_df.columns = ['timestamp', 'value', 'host_id'] #print(ts_df.sample(10)) data_ts = TimeSeries.TimeSeries(data_df.timestamp, data_df.value) data_test = data_ts.getDataframe() print(data_test.head(10))
def checkdatastationarity(data): keys = list(data.keys()) df = ts.ReadData(data) keys.remove('Dates') result = dict() for key in keys: df[key] = df[key].astype('float32') outdata = test_stationarity(df[key]) if outdata[0] > outdata[7]: result[key] = False else: result[key] = True return result
def _get_ts_array(time_type): tsg = TSG.TimeSeriesGenerator() ts = TS.TimeSeries() if time_type == "d": ts.set_ts_list(tsg.generate_day_data(), ["TIME", "VALUE"]) elif time_type == "w": ts.set_ts_list(tsg.generate_week_data(), ["TIME", "VALUE"]) elif time_type == "m" or time_type == "y": ts.set_ts_list(tsg.generate_month_data(), ["TIME", "VALUE"]) ts_d = ts.get_ts_df() return ts_d["TIME"].array
class Responder(threading.Thread): def __init__(self, queue, Config, threadID): self.__queue = queue self._config = Config self.threadName = "responder-" + str(threadID) threading.Thread.__init__(self, name=self.threadName) logger.debug("responder started: %s" % self.threadName) # setup the time series connection self._db = TimeSeries(self._config) def run(self): while 1: # watch the queue forever item = self.__queue.get() if item is None: break # reached end of queue # store the info in redis self._db.zput(item['name'], item, item['when']) # and cleanup any old data about this name self._db.zexpire(item['name'])
class Responder(threading.Thread): def __init__(self, queue, Config, threadID): self.__queue = queue self.Config = Config self.threadName = "responder-" + str(threadID) threading.Thread.__init__(self, name=self.threadName) # myIP = socket.gethostbyname(socket.gethostname()) self._db = TimeSeries() def run(self): while 1: item = self.__queue.get() if item is None: break # reached end of queue # store the info in redis self._db.zput(item['name'], item, item['when']); # and cleanup any old data about this name self._db.zexpire(item['name']);
class Responder(threading.Thread): def __init__(self, queue, Config, threadID): self.__queue = queue self._config = Config self.threadName = "responder-" + str(threadID) threading.Thread.__init__(self, name=self.threadName) logger.debug("responder started: %s" % self.threadName) # setup the time series connection self._db = TimeSeries(self._config) def run(self): while 1: # watch the queue forever item = self.__queue.get() if item is None: break # reached end of queue # store the info in redis self._db.zput(item['name'], item, item['when']); # and cleanup any old data about this name self._db.zexpire(item['name']);
def load(self, features:[], targets:[], test_size:float, time_steps:int, show_plots:bool=False): self.preprocessor = TimeSeries.Preprocessor() self.preprocessor.fit(data=self.data.copy(), features=features, targets=targets, test_size=test_size, time_steps=time_steps) # Visualize the targets retyped_data = self.preprocessor.retype(self.data) TimeSeries.visualize(data=retyped_data, name=self.name, time_len=self.time_len, targets=self.preprocessor.targets, show_plots=show_plots) X,Y = self.preprocessor.transform(data=self.data.copy(), train=True) X_train_dataloader, X_val_dataloader, X_test_dataloader = self.preprocessor.dataloader(X) Y_train_dataloader, Y_val_dataloader, Y_test_dataloader = self.preprocessor.dataloader(Y) self.X_train_dataloader = X_train_dataloader self.X_val_dataloader = X_val_dataloader self.X_test_dataloader = X_test_dataloader self.Y_train_dataloader = Y_train_dataloader self.Y_val_dataloader = Y_val_dataloader self.Y_test_dataloader = Y_test_dataloader print('') print('Data prepared:') print(' Number of Features: {}'.format(X.shape[-1])) print(' Number of Outputs: {}'.format(Y.shape[-1])) print(' Lookback {} time steps'.format(time_steps)) print('')
def _get_ts_df(time_type): tsg = TSG.TimeSeriesGenerator() ts = TS.TimeSeries() if time_type == "d": ts.set_ts_list(tsg.generate_day_data(), ["TIME", "VALUE"]) elif time_type == "w": ts.set_ts_list(tsg.generate_week_data(), ["TIME", "VALUE"]) elif time_type == "m" or time_type == "y": ts.set_ts_list(tsg.generate_month_data(), ["TIME", "VALUE"]) ts_df = ts.get_ts_df() ts_df["VALUE"] = [1] * ts_df.shape[0] return ts_df
def setUp(self): self.series = TimeSeries(1, 0.01, [1, -1, 3, -2, 5])
def __init__(self, paramIndex, seriesType): self.data = TimeSeries.TimeSeries(paramIndex, seriesType) self.numberOfBlocks = 10
scriptPath = os.path.realpath(os.path.dirname(sys.argv[0])) # account for where we live sys.path.append(scriptPath + '/..') sys.path.append(scriptPath + '/../lib') sys.path.append(scriptPath + '/../proto') from time import time from TimeSeries import * import pprint import random pp = pprint.PrettyPrinter(indent=4) t = TimeSeries() value = {} value['address'] = '1.2.3.4' value['speed'] = 1234 value['when'] = time() #t.zput('www.yahoo.com', value, value['when']); r = t.zget('www.google.com') pp.pprint(r) #t.zexpire('www.yahoo.com') addressData = {} priorities = {} for a in r: try:
import os,sys,inspect currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) parentdir = os.path.dirname(currentdir) sys.path.insert(0,parentdir) import TimeSeries as ts import pandas as pd import numpy as np import matplotlib.pyplot """Create model instance""" model = ts.ARIMA_model() """Create Timeseries""" u = pd.read_csv("/Users/Petros/Desktop/shampoo.csv") for i in range(u.index.size): u.loc[i, "value"] = (np.random.uniform() - 0.5) x = u.copy() y = u.copy() """MA process""" for i in range(2, u.index.size): x.loc[i, "value"] = u.loc[i, "value"] +0.65*u.loc[i-1, "value"] - 0.25*u.loc[i-2, "value"] """AR process""" for i in range(2, u.index.size): y.loc[i, "value"] = -0.7*y.loc[i-1, "value"] + 0.2*y.loc[i-2, "value"] + u.loc[i, "value"]
exdf = pd.read_excel(FI_CALC_Path, 'input', header=0, index_col=0) df = pd.DataFrame() for i, s in exdf.iterrows(): if i.to_datetime().date() >= start and i.to_datetime().date() <= end: df = df.append(s) return df start = datetime.date(2004, 1, 1) end = datetime.date(2017, 2, 1) data = pdLoadBetas(start, end) shortData = data['2011-02-01':] #TIME SERIES ANALYSIS if False: d = data['beta0'] TimeSeries.test_stationarity(d, 'Original') #beta0 NonStationary diffs = d.diff()[1:len(d)] TimeSeries.test_stationarity(diffs, 'Diffs') #diffs stationary TimeSeries.ACF_analysis(diffs) logdiffs = np.log(d).diff()[1:len(d)] TimeSeries.test_stationarity(logdiffs, 'Logdiffs') TimeSeries.ACF_analysis(logdiffs) TimeSeries.ARIMA_auto_fit(d) #BIC: DIFFs - (1,1,0), LogDiffs - (0,1,1) #AIC: DIFFs - (5,1,1), LogDiffs - (4,1,1) breakpoint = '2017-02-01' mod = TimeSeries.AR_predict(d[:breakpoint]) #sm.tsa.AR(ts).fit(maxlag=1, method='cmle')
#SAVE FUTURES PRICES TO FUTURES PRICE TIME SERIES DB _FUT = dict() # LOAD DATABASE FROM FILE IF IT EXISTS if os.path.isfile(futDbFileName): dbfile = open(futDbFileName) _FUT = pickle.load(dbfile) dbfile.close() #SAVE PRICES TO HASH TABLE for code in FUTURES.keys(): if not _FUT.has_key(code): _FUT[code] = dict() for year in FUTURES[code].keys(): if not _FUT[code].has_key(year): _FUT[code][year] = dict() for month in FUTURES[code][year].keys(): if not _FUT[code][year].has_key(month): _FUT[code][year][month] = TimeSeries() _FUT[code][year][month].Update(refDate, FUTURES[code][year][month]) #SAVE HASH DB TO FILE dbfile = open(futDbFileName, 'w') pickle.dump(_FUT, dbfile) dbfile.close() #PRINT VOL SURFACE DATA TO FILE FOR TEST output = 'Code,Year,Month,Future' for j in range(len(skews)): output += ',' + str(skews[j]) #COPY OUTPUT HEADER TO NORMAL VOLS OUTPUT n_output = output for code in sorted(SMILES.keys()): for year in sorted(SMILES[code].keys()): for month in sorted(SMILES[code][year].keys()):
plt.xlim((-5, 5)) plt.ylim((-5, 5)) plt.title('Phase Portrait') chirp['Raw'] = [Lchann, Rchann] return chirp #%% SGs = nestdict() for mm, modal in enumerate(['LFP']): for pp, pt in enumerate(['905']): SGs[modal][pt] = defaultdict(dict) for cc, condit in enumerate(['OnTarget', 'OffTarget']): Data = [] Data = ts.import_BR(Ephys[pt][condit]['Filename'], snip=(0, 0)) #Data = dbo.load_BR_dict(Ephys[modal][pt][condit]['Filename'],sec_end=0) #Compute the TF representation of the above imported data F, T, SG, BANDS = Data.compute_tf() SG_Dict = dbo.gen_SG(Data.extract_dict(), overlap=False) #Fvect = dbo.calc_feats() #for iv, interval in enumerate(): [datatv, dataraw] = Data.raw_ts() SGs[modal][pt][condit]['SG'] = { chann: SG_Dict[chann]['SG'] for chann in ['Left', 'Right'] } SGs[modal][pt][condit]['Raw'] = dataraw SGs[modal][pt][condit]['TRaw'] = datatv
scriptPath = os.path.realpath(os.path.dirname(sys.argv[0])) # account for where we live sys.path.append(scriptPath + '/..') sys.path.append(scriptPath + '/../lib') from TimeSeries import * import time import logging import logging.handlers import random import pprint pp = pprint.PrettyPrinter(indent=4) logger = logging.getLogger("ogslb") redis = TimeSeries() def DNSLookup(query): """parse DNS query and produce lookup result. query: a sequence containing the DNS query as per PowerDNS manual appendix A: http://downloads.powerdns.com/documentation/html/backends-detail.html#PIPEBACKEND-PROTOCOL """ (_type, qname, qclass, qtype, _id, ip) = query logger.debug("doing a lookup") results = '' # we only deal with a few of the query types if (qtype == 'A' or qtype == 'ANY' or qtype == 'CNAME'):
def get_dataset(): net = n.test_net() # standard network net_hl = n.test_net_hl() # network high load: higher P and Q value for each load + noise net_ll = n.test_net_ll() # network high load: smaller P and Q value for each load + noise net_no_gen = n.test_net_no_gen() # network without the generator at bus 3. net_no_l = n.test_net_no_l() # network without the line between bus 5 and 6. net_no_load = n.test_net_no_load() # network without load 2 output_dir = os.path.join(tempfile.gettempdir(), "time_series") print("Results can be found in your local temp folder: {}".format(output_dir)) data_folder = Path(output_dir) if not os.path.exists(output_dir): os.mkdir(output_dir) ts.timeseries(output_dir, net) vm_path = data_folder / "res_bus" / "vm_pu.xls" va_path = data_folder / "res_bus" / "va_degree.xls" df1 = pd.read_excel(vm_path) df2 = pd.read_excel(va_path) df = pd.concat([df1, df2], axis=1, ignore_index=True) # merge voltage magnitude data and voltage angle data normalize(df) # normalize values df['Feature'] = 'reference' # add feature output_dir = os.path.join(tempfile.gettempdir(), "time_series_hl") data_folder = Path(output_dir) if not os.path.exists(output_dir): os.mkdir(output_dir) ts.timeseries(output_dir, net_hl) vm_path = data_folder / "res_bus" / "vm_pu.xls" va_path = data_folder / "res_bus" / "va_degree.xls" df1 = pd.read_excel(vm_path) df2 = pd.read_excel(va_path) df_hl = pd.concat([df1, df2], axis=1, ignore_index=True) # merge voltage magnitude data and voltage angle data normalize(df_hl) # normalize values df_hl['Feature'] = 'high load' # add feature output_dir = os.path.join(tempfile.gettempdir(), "time_series_ll") data_folder = Path(output_dir) if not os.path.exists(output_dir): os.mkdir(output_dir) ts.timeseries(output_dir, net_ll) vm_path = data_folder / "res_bus" / "vm_pu.xls" va_path = data_folder / "res_bus" / "va_degree.xls" df1 = pd.read_excel(vm_path) df2 = pd.read_excel(va_path) df_ll = pd.concat([df1, df2], axis=1, ignore_index=True) # merge voltage magnitude data and voltage angle data normalize(df_ll) # normalize values df_ll['Feature'] = 'low load' # add feature output_dir = os.path.join(tempfile.gettempdir(), "time_series_no_gen") data_folder = Path(output_dir) if not os.path.exists(output_dir): os.mkdir(output_dir) ts.timeseries(output_dir, net_no_gen) vm_path = data_folder / "res_bus" / "vm_pu.xls" va_path = data_folder / "res_bus" / "va_degree.xls" df1 = pd.read_excel(vm_path) df2 = pd.read_excel(va_path) df_no_gen = pd.concat([df1, df2], axis=1, ignore_index=True) # merge voltage magnitude data and voltage angle data normalize(df_no_gen) # normalize values df_no_gen['Feature'] = 'no gen' # add feature output_dir = os.path.join(tempfile.gettempdir(), "time_series_no_l") data_folder = Path(output_dir) if not os.path.exists(output_dir): os.mkdir(output_dir) ts.timeseries(output_dir, net_no_l) vm_path = data_folder / "res_bus" / "vm_pu.xls" va_path = data_folder / "res_bus" / "va_degree.xls" df1 = pd.read_excel(vm_path) df2 = pd.read_excel(va_path) df_no_l = pd.concat([df1, df2], axis=1, ignore_index=True) # merge voltage magnitude data and voltage angle data normalize(df_no_l) # normalize values df_no_l['Feature'] = 'no line' # add feature output_dir = os.path.join(tempfile.gettempdir(), "time_series_no_load") data_folder = Path(output_dir) if not os.path.exists(output_dir): os.mkdir(output_dir) ts.timeseries(output_dir, net_no_load) vm_path = data_folder / "res_bus" / "vm_pu.xls" va_path = data_folder / "res_bus" / "va_degree.xls" df1 = pd.read_excel(vm_path) df2 = pd.read_excel(va_path) df_no_load = pd.concat([df1, df2], axis=1, ignore_index=True) # merge voltage magnitude data and voltage angle data normalize(df_no_load) # normalize values df_no_load['Feature'] = 'no load' # add feature # Merge all the operating states data files in one df_all = pd.concat([df, df_hl, df_ll, df_no_gen, df_no_l, df_no_load], axis=0, ignore_index=True) df_all = df_all.fillna(0) # fill none values with 0 df_all = df_all.drop(columns=10) df_all = df_all.drop(columns=0) dataset = df_all.values return df_all,dataset
def main(): pid = os.getpid() logFile = '/tmp/backend-%d.log' % pid debug = 1 # define some defaults configFile = scriptPath + '/../etc/config.xml' # load up the configs Config = ParseConfig.parseConfig(configFile) # setup the logger if (debug): logger.setLevel(logging.DEBUG) else: logger.setLevel(logging.INFO) ch = logging.handlers.RotatingFileHandler(logFile, maxBytes=25000000, backupCount=5) if (debug): ch.setLevel(logging.DEBUG) else: ch.setLevel(logging.INFO) formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s") ch.setFormatter(formatter) logger.addHandler(ch) # and fire up the logger logger.info('startup') first_time = True db = TimeSeries(Config) # here, we have to deal with how to talk to PowerDNS. while 1: # loop forever reading from PowerDNS rawline = sys.stdin.readline() if rawline == '': logger.debug('EOF') return # EOF detected line = rawline.rstrip() logger.debug('received from pdns:%s' % line) # If this is the first pass reading from PowerDNS, look for a HELO if first_time: if line == 'HELO\t1': fprint('OK\togslb backend firing up') else: fprint('FAIL') logger.debug('HELO input not received - execution aborted') rawline = sys.stdin.readline( ) # as per docs - read another line before aborting logger.debug('calling sys.exit()') sys.exit(1) first_time = False else: # now we actually get busy query = line.split('\t') if len(query) != 6: # fprint('LOG\tPowerDNS sent unparseable line') # fprint('FAIL') fprint('END') else: logger.debug('Performing DNSLookup(%s)' % repr(query)) lookup = '' # Here, we actually to the real work. Lookup a hostname in redis and prioritize it lookup = DNSLookup(db, query) if lookup != '': logger.debug(lookup) fprint(lookup) fprint('END')
def __init__(self): self.ts = TS.TimeSeries() self.dwmy_df_t = DWMYDFT.DWMYDFTransformator() self.dwmy_group_t = DWMYGgroup.DWMYGroupTransformator() self.ts_to_index_t = TSToIndex.TSToIndexTransformator() self.tri_save_load = TripletSaverAndLoader.TripletSaverAndLoader(None)
scriptPath = os.path.realpath(os.path.dirname(sys.argv[0])) # account for where we live sys.path.append(scriptPath + '/..') sys.path.append(scriptPath + '/../lib') sys.path.append(scriptPath + '/../proto') from time import time; from TimeSeries import * import pprint import random pp = pprint.PrettyPrinter(indent=4) t = TimeSeries() value = {} value['address'] = '1.2.3.4' value['speed'] = 1234 value['when'] = time() #t.zput('www.yahoo.com', value, value['when']); r = t.zget('www.google.com') pp.pprint(r) #t.zexpire('www.yahoo.com') addressData = {} priorities = {} for a in r:
def main() : # Start of TRAP sys.stdout.write("\n###### Pathway and clustering analysis ######\n\n") sys.stdout.flush() cuffPath = "cufflinks_result" diffPath = "cuffdiff_result" resultPath = "TRAP_result" controlList = [] caseList = [] diffList = [] timeLen = 0 geneIDPath = "" pnamePath = "" kgmlPath = "" xmlPath = "" cuffdiff= "" pCut = 0.05 DEGCut = 2.0 clusterCut = 2.0 timeLag = 1.0 fcList = {} # fcList[geneID]=[fc_0, ... , fc_t] pVal = {} # pVal[geneID]=[p_0, ..., p_t] idDic = {} # idDic[keggID]=geneID pnameDic = {} # pnameDic[pID]=pathwayName # Reading configuration file sys.stdout.write("Reading configuration file\t......\t") sys.stdout.flush() try : config = open("config.txt", "r") while True : cl = config.readline() if cl=="" : break tp = cl.split("=") if (len(tp)<2) : continue key=tp[0].strip() val=tp[1].strip() if (key[:7]=="control") : controlList.append(val.split(',')) elif (key[:9]=="treatment") : caseList.append(val.split(',')) elif (key=="numTP") : timeLen=int(val) elif (key=="convfilePath") : geneIDPath = val elif (key=="pnamePath") : pnamePath = val elif (key=="kgmlPath") : kgmlPath = val elif (key=="cuffdiff") : cuffdiff = val elif (key=="pVal") : pCut = float(val) elif (key[:4]=="diff") : diffList.append(val) elif (key=="DEGCut") : DEGCut = float(val) elif (key=="clusterCut") : clusterCut = float(val) elif (key=="timeLag") : timeLag = float(val) else : continue idFile = open(geneIDPath, "r") pnameFile = open(pnamePath, "r") xmlPath = os.walk(kgmlPath) if (cuffdiff=="no" and len(controlList)!=len(caseList)) : raise except IOError: print "Check if the configuration file exists" except : print "Configuration file error" raise # Make sure result path exists try: os.makedirs(resultPath) except OSError: if not os.path.isdir(resultPath): raise # Copy config file so we don't get confused of what params have been set copy("config.txt", resultPath) # Reading ID-conversion / pathway name file for ids in idFile.readlines() : tp = ids.split("\t") tp2 = tp[1].split(";") tp3 = tp2[0].split(", ") if tp[0] in idDic : for name in tp3 : idDic[tp[0]].append(name.strip()) else : idDic[tp[0]]=[] for name in tp3 : idDic[tp[0]].append(name.strip()) idFile.close() for path in pnameFile.readlines() : tp = path.split("\t") tp2 = tp[0].split(":") tp3 = tp[1].split(" - ") pnameDic[tp2[1]]=tp3[0] pnameFile.close() sys.stdout.write("Done\n") sys.stdout.flush() # Reading fpkm file sys.stdout.write("Reading expression files\t......\t") sys.stdout.flush() geneSum = set() if cuffdiff=="yes" : for j in range(timeLen) : pfile = open(os.path.join(diffPath, diffList[j], "gene_exp.diff"), "r") for l in pfile.readlines() : if l.startswith('#'): continue tp = l.split() if not is_number(tp[9]) : continue geneSum.add(tp[2]) pfile.close() for gene in geneSum : fcList[gene]=[] pVal[gene]=[] for j in range(timeLen) : pfile = open(os.path.join(diffPath, diffList[j], "gene_exp.diff"), "r") temp = {} temp2 = {} for l in pfile.readlines() : if l.startswith('#'): continue tp = l.split() if not is_number(tp[9]) : continue if ( tp[9]=='inf' or tp[9] == '-inf') : temp[tp[2]]=0 else: temp[tp[2]]=float(tp[9]) temp2[tp[2]]=float(tp[12]) for gene in geneSum : if gene in temp : fcList[gene].append(temp[gene]) pVal[gene].append(temp2[gene]) else : fcList[gene].append(0) pVal[gene].append(1) pfile.close() else : for j in range(timeLen) : for con in controlList[j] : pfile = open(os.path.join(cuffPath, con, "genes.fpkm_tracking"), "r") for l in pfile.readlines() : tp = l.split() if not is_number(tp[9]) : continue geneSum.add(tp[4]) pfile.close() for case in caseList[j] : pfile = open(os.path.join(cuffPath, case, "genes.fpkm_tracking"), "r") for l in pfile.readlines() : tp = l.split() if not is_number(tp[9]) : continue geneSum.add(tp[4]) pfile.close() for gene in geneSum : fcList[gene]=[] for j in range(timeLen) : temp1 = {} temp2 = {} for con in controlList[j] : pfile = open(os.path.join(cuffPath, con, "genes.fpkm_tracking"), "r") for l in pfile.readlines() : tp = l.split() if (tp[9]=="FPKM") : continue if tp[4] in temp1 : temp1[tp[4]].append(float(tp[9])) else : temp1[tp[4]]=[float(tp[9])] pfile.close() for case in caseList[j] : pfile = open(os.path.join(cuffPath, case, "genes.fpkm_tracking"), "r") for l in pfile.readlines() : tp = l.split() if (tp[9]=="FPKM") : continue if tp[4] in temp2 : temp2[tp[4]].append(float(tp[9])) else : temp2[tp[4]]=[float(tp[9])] pfile.close() for gene in geneSum : med1 = 0 med2 = 0 if gene in temp1 and gene in temp2 : med1 = median(temp1[gene]) med2 = median(temp2[gene]) elif gene in temp1 : med1 = median(temp1[gene]) elif gene in temp2 : med2 = median(temp2[gene]) else : med1 = med1 med2 = med2 if (abs(med2-med1)<1.0) : fcList[gene].append(0) else : fcList[gene].append(math.log((med2+0.01)/(med1+0.01),2)) sys.stdout.write("Done\n") sys.stdout.flush() # Parsing xml file to get gene and relation information sys.stdout.write("Reading xml files\t\t......\t") sys.stdout.flush() i=0 ind = {} DEG = [] wgene = [] wredic = [] empty = [] empty2 = [] for t in range(0, timeLen) : wgene.append([]) #wgene[t][i]={keggID:fc} DEG.append([]) #DEG[t][i]=set(keggID) empty.append(0) empty2.append(1) for root,dirs,files in xmlPath: for file in files: filetp = file.split(".") ind[filetp[0]]=i for j in range(0, timeLen) : wgene[j].append({}) DEG[j].append(set()) wredic.append({}) #wredic[i]={keggID:(list of [asc, length, j])} xmlfile = open(os.path.join(kgmlPath, file), "r") xmldata = xmlfile.read() dom = parseString(xmldata) xmlfile.close() geneSet = set() entrydic = {} entries = dom.getElementsByTagName("entry") for e in entries : if (e.attributes.getNamedItem("type").nodeValue == 'gene') : id = e.attributes.getNamedItem("id").nodeValue id = str(id) genes = e.attributes.getNamedItem("name").nodeValue genes = str(genes) genelist = genes.split() entrydic[id]=[] for g in genelist : entrydic[id].append(g) geneSet.add(g) elif (e.attributes.getNamedItem("type").nodeValue == 'group') : id = e.attributes.getNamedItem("id").nodeValue id = str(id) comps = e.getElementsByTagName("component") entrydic[id]=[] for c in comps : geneId =c.attributes.getNamedItem("id").nodeValue for g in entrydic[geneId] : entrydic[id].append(g) geneSet.add(g) for g in geneSet : if (g in idDic) : nameExist = 0 tpName = "" for name in idDic[g] : if name in fcList.keys() : nameExist = 1 tpName = name break if nameExist==1 : for t in range(0, timeLen) : foldchange = fcList[tpName][t] wgene[t][i][g]=foldchange if (cuffdiff=="yes" and pVal[tpName][t]<=pCut and abs(foldchange)>=DEGCut) : DEG[t][i].add(g) elif (cuffdiff=="no" and abs(foldchange)>=DEGCut) : DEG[t][i].add(g) else : for t in range(0, timeLen) : wgene[t][i][g]=0 fcList[idDic[g][0]]=empty if (cuffdiff=="yes") : pVal[idDic[g][0]]=empty2 else : for t in range(0, timeLen) : wgene[t][i][g]=0 fcList[g]=empty if (cuffdiff=="yes") : pVal[g]=empty2 redic = wredic[i] relations = dom.getElementsByTagName("relation") for r in relations : subs = r.getElementsByTagName("subtype") ent1 = r.attributes.getNamedItem("entry1").nodeValue ent2 = r.attributes.getNamedItem("entry2").nodeValue if (not (subs==[])) : for s in subs : type = s.attributes.getNamedItem("name").nodeValue if (type=="activation" or type=="expression") : j=1 elif (type=="inhibition" or type=="repression") : j=-1 else : j=0 if (j!=0 and (ent1!=ent2) and (ent1 in entrydic) and (ent2 in entrydic)) : for desc in entrydic[ent2] : length = len(entrydic[ent2]) for asc in entrydic[ent1] : if (desc in redic) : redic[desc].append([asc, length, j]) else : redic[desc]=[[asc, length, j]] i=i+1 fileN = i sys.stdout.write("Done\n") sys.stdout.flush() # 1. One time point SPIA analysis sys.stdout.write("One time point SPIA analysis\n") sys.stdout.flush() for t in range(0, timeLen) : sys.stdout.write("\t"+str(t+1)+"th time point\t......\t") sys.stdout.flush() OT.pathwayAnalysis(os.path.join(resultPath, "OneTime_"+str(t+1)), fileN, wgene[t], wredic, DEG[t], DEGCut, idDic, pnameDic, ind) sys.stdout.write("Done\n") sys.stdout.flush() # 2. Time-series SPIA analysis sys.stdout.write("Time-series SPIA analysis\t......\t") sys.stdout.flush() TS.pathwayAnalysis(os.path.join(resultPath, "TimeSeries"), wgene, wredic, DEG, idDic, pnameDic, timeLag, timeLen, ind, fcList) sys.stdout.write("Done\n") sys.stdout.flush() # 3. Clustering analysis sys.stdout.write("Clustering Analysis\t\t......\t") sys.stdout.flush() CL.clusteringAnalysis(os.path.join(resultPath, "Clustering"), wgene, fcList, pVal, idDic, pnameDic, clusterCut, pCut, timeLen, ind, cuffdiff) sys.stdout.write("Done\n") sys.stdout.flush()