def getAllFeatures(train, mapper): print "this is getAllFeatures" # every record has a cluster value calculated by lda w2c_f, w2c_w = 10, 14 lda_dict_1 = util.read_dict(util.features_prefix + 'id_lda_256.pkl') lda_dict_2 = util.read_dict(util.features_prefix + 'id_lda_512.pkl') k_mean_dict_1 = util.read_dict(util.features_prefix + 'c_k_all_64.pkl') k_mean_dict_2 = util.read_dict(util.features_prefix + 'c_k_all_128.pkl') sentence_dict_path = util.txt_prefix + 'id_sentences.pkl' word2vec_path = util.txt_prefix + str(w2c_f) + 'features_1minwords_' + str( w2c_w) + 'context.pkl' sentence_dic = util.read_dict(sentence_dict_path) model = Word2Vec.load(word2vec_path) train_X = train[features] train_X = mapper.transform(train_X) # .values new_train_X = [] for i in xrange(len(train_X)): id = train_X[i][0] lda_1 = lda_dict_1[id] lda_2 = lda_dict_2[id] s = sentence_dic.get(id) f = np.concatenate(([train_X[i][1:].astype(np.float32)], [ sentence_to_matrix_vec(s, model, w2c_f, k_mean_dict_1, k_mean_dict_2) ]), axis=1)[0] f = np.concatenate(([f], [[lda_1, lda_2]]), axis=1)[0] new_train_X.append(f) new_train_X = np.array(new_train_X) return new_train_X
def getAllFeatures(train, mapper): print "this is getAllFeatures" # every record has a cluster value calculated by lda w2c_f, w2c_w = 10, 14 lda_dict_1 = util.read_dict(util.features_prefix + 'id_lda_256.pkl') lda_dict_2 = util.read_dict(util.features_prefix + 'id_lda_512.pkl') k_mean_dict_1 = util.read_dict(util.features_prefix + 'c_k_all_64.pkl') k_mean_dict_2 = util.read_dict(util.features_prefix + 'c_k_all_128.pkl') sentence_dict_path = util.txt_prefix + 'id_sentences.pkl' word2vec_path = util.txt_prefix + str(w2c_f) + 'features_1minwords_' + str(w2c_w) + 'context.pkl' sentence_dic = util.read_dict(sentence_dict_path) model = Word2Vec.load(word2vec_path) train_X = train[features] train_X = mapper.transform(train_X) # .values new_train_X = [] for i in xrange(len(train_X)): id = train_X[i][0] lda_1 = lda_dict_1[id] lda_2 = lda_dict_2[id] s = sentence_dic.get(id) f = np.concatenate(([train_X[i][1:].astype(np.float32)], [sentence_to_matrix_vec(s, model, w2c_f, k_mean_dict_1, k_mean_dict_2)]), axis=1)[0] f = np.concatenate(([f], [[lda_1, lda_2]]), axis=1)[0] new_train_X.append(f) new_train_X = np.array(new_train_X) return new_train_X
def FIcompare(folder, cells, currents = [], freqs = [],\ firing_rate_data = 'firing_rate_data.txt'): ''' f = FIcompare(folder, cells, currents = [], freqs = [],\ firing_rate_data = 'firing_rate_data.txt'): Plot current clamp firing traces with certain currents input and with firing frequencies in a certain range. parameters: folder (string) - directory to the folder with raw data cells (array_like) - indices of neurons to plot currents (array_like) - list of input currents freqs (list) - of two scalars, range of the firing rates to be included firing_rate_data (string) - firing rate data file directory return: f (list) - list of figure windows ''' data = util.read_dict(firing_rate_data, 'int') f = [] for cell in cells: for trial, stim, fr in zip(*data[cell][1]): if (len(currents) == 0 or stim in currents) and \ (len(freqs) == 0 or (freqs[0] <= fr and fr < freqs[1])): trace, sr, st = util.load_wave(folder + util.gen_name(cell, trial)) f.append(plot.plot_trace_v(trace, sr)) f[-1].setWindowTitle('Cell {0:d}, Trial {1:d}, I = {2:.2e}'.\ format(cell, trial, st[2])) return f
def __init__(self, folder, data_file, fi_file): ''' Get basic information about the data. parameters: folder (String) - directory to folder of the raw data files data_file (String) - directory to the data file with the cell type info fi_file (String) - directory to the data file with the firing rate data ''' self.folder = folder # raw data folder directory self.data = pd.read_csv(data_file) # cell type info data self.fi_data = util.read_dict(fi_file, 'int') # firing rate and stim current data self.trial_data = [] # chosen data to plot
def read_puff_out(md_dir): """ Yields a dictionary representing the properties of PushApartByVel at each frame of a pulsed simulation from the specified md.puff.out file. """ # get time in ps, typical MD step is 0.001 ps = 1 fs config = os.path.join(md_dir, 'md.puff.config') parms = util.read_dict(config) dt = 0.001 * parms['n_step_per_pulse'] time = 0.0 for line in open(os.path.join(md_dir, 'md.puff.out')): entry = eval(line) entry['time'] = time yield entry time += dt
def guess_n_frame_per_ps(basename): """ Returns the n_frame_per_ps of a trajectory by reading any .config files that would have been generated using simualte.py. """ config = basename + ".config" try: params = util.read_dict(config) # assuming 1fs time step n_step_per_ps = 1000 if 'n_step_per_snapshot' in params: n_step_per_snapshot = params['n_step_per_snapshot'] n_frame_per_ps = n_step_per_ps / n_step_per_snapshot except: n_frame_per_ps = 50 return n_frame_per_ps
def read_puff_out(md_dir): """ Yields a dictionary representing the properties of PushApartByVel at each frame of a pulsed simulation from the specified md.puff.out file. """ # get time in ps, typical MD step is 0.001 ps = 1 fs config = os.path.join(md_dir, 'md.puff.config') parms = util.read_dict(config) dt = 0.001*parms['n_step_per_pulse'] time = 0.0 for line in open(os.path.join(md_dir, 'md.puff.out')): entry = eval(line) entry['time'] = time yield entry time += dt
def IV_curve(folder, cells, data_file, type_file='', out='IV_curve.png'): f = plt.figure() ax = f.add_subplot(111) data = util.read_dict(folder + data_file, 'int') if len(type_file): type_data = pd.read_csv(folder + type_file) groups = type_data['group'] ncolors = len(np.unique(groups)) cm = plt.get_cmap('gist_rainbow') cl = np.array([cm(1 * i / ncolors) for i in range(ncolors)]) for cell in cells: if len(type_file): color = cl[np.nonzero(np.unique(groups) == \ groups[np.nonzero(type_data['No'] == cell)[0][0]])[0][0]] else: color = 'k' ax.plot(data[cell][0], data[cell][1], color=color) f.savefig(folder + out, dpi=200, transparent=True) return 0
def FI_slope(data_file, cells): ''' slope = FI_slope(data_file, cells, stims = []): Calculate firing slope using averaged firing rate data. parameters: data_file (String) - directory to firing rate data file cells (array_like) - indices of cells to be analyzed return: slope (array_like) - FI curve slope of all the cells ''' data = util.read_dict(data_file, 'int') slope = [] for cell in cells: stims = np.array(data[cell][0][0]) rates = np.array(data[cell][0][1]) firing_ind = np.nonzero(rates)[ 0] # indices of point with firing rate above zero p = np.polyfit(stims[firing_ind], rates[firing_ind], 1) slope.append(p[0]) return slope
def FI_curve(data_file, type_file = None, ave = False, stims = [], cl = [], \ cells = [], sigtp = 0, out = 'tmp.png'): ''' FI_curve(data_file, type_file = None, ave = False, stims = [], cl = [], out = 'tmp.png') Plot FI curves, for each cell, average replicated traces with the same current stimulation. parameters: data_file (string) - directory of data file with firing rate data, refer to ap.firing_rate type_file (string) - directory of cell type csv files with cell indices in column No type value (0, 1, 2, ...) in column group. If not provided, won't differentiate cell types. ave (boolean) - whether to average cells of the same type stim (array_like) - stimulation current steps. If not provided, use the steps of the first cell from the data file, assuming all the cells have the same steps as the first one. cl (array_like) - color of the different types. If type is specified but color is not provided, generate color from gist_rainbow color map. cells (array_like) - ids of cells to be analyze, default is an empty list, this will calculate all the cells in the type_file. sigtp (float) - significance test p-value, default is 0, means no test out (string) - directory of output figure file ''' data = util.read_dict(data_file, 'int') type_data = pd.read_csv(type_file) if not len(stims): stims = list(data.values())[0][0][0] else: stims = list(stims) ind = 0 if len(cells): keys = cells elif type_file != None: keys = type_data['No'] else: keys = data.keys() crates = np.empty((len(keys), len(stims))) crates[:] = np.nan _cells = [] for key in keys: values = data[key] _cells.append(key) ''' _stim = values[0] _target = np.array([stims]).T * np.ones((1, len(_stim))) ind = np.nonzero(_target == _stim)[1] print(key) print(np.array(values[1])[:, ind].mean(0).reshape((1, -1)).shape) if 'crates' in locals(): crates = np.vstack((crates, \ np.array(values[1])[:, ind].mean(0).reshape((1, -1)))) else: crates = np.array(values[1])[:, ind].mean(0).reshape((1, -1)) ''' _stim = np.array(values[0][0]) for s in stims: s_ind = np.nonzero(abs(_stim - s) < 1e-14)[0] if len(s_ind): crates[ind][stims.index(s)] = values[0][1][s_ind[0]] ind = ind + 1 if len(cells): cells = np.array(cells) crates = crates[[_cells.index(d) for d in cells], :] else: cells = np.array(_cells) stims = np.array(stims) * 1e12 if type_file != None: ''' type_data = util.read_csv(type_file) types = type_data[np.nonzero(type_data[:, [0]] == \ np.ones((type_data.shape[0], 1)) * cells)[1], -1] ''' types = type_data.loc[np.nonzero( np.array(type_data['No']) == cells.reshape(-1, 1) * np.ones((1, len(type_data.index))))[1], 'group'] if sigtp != 0 and len(np.unique(types)) == 2: ps = [] for i in range(len(stims)): p = util.permutationTest(*[crates[types == d, i].flatten() \ for d in np.unique(types)]) ps.append(p) print(ps) ps = np.array(ps) < sigtp print('type', types) print('cells', cells) f = plt.figure() ax = f.add_subplot(111) if ave: if type_file == None: mrates = np.nanmean(crates, 0) se = np.nanstd(crates, 0) / crates.shape[0] ax.errorbar(stims, mrates, se, ecolor='k', label='Average') else: if not len(cl): ncolors = len(np.unique(types)) cm = plt.get_cmap('gist_rainbow') cl = [cm(1 * i / ncolors) for i in range(ncolors)] for t, color in zip(np.unique(types), cl): print('t', t) print('color', color) _crates = crates[types == t, :] print('cells', cells[types == t]) mrates = np.nanmean(_crates, 0) se = np.nanstd(_crates, 0) / _crates.shape[0] ax.errorbar(stims, mrates, se, color = color, \ label = t, lw = 2) else: if type_file == None: if not len(cl): ncolors = len(np.unique(types)) cm = plt.get_cmap('gist_rainbow') cl = [cm(1 * i / ncolors) for i in range(ncolors)] for crate, color, c in zip(ctates, cl, cells): ax.plot(stims, crate, ecolor=color, label=str(c)) else: if not len(cl): ncolors = len(np.unique(types)) cm = plt.get_cmap('gist_rainbow') cl = [cm(1 * i / ncolors) for i in range(ncolors)] for i, t in enumerate(np.unique(types)): _crates = crates[types == t, :] for crate in _crates: ax.plot(stims, crate, c=cl[i]) #, label = str(t)) ax.legend(loc=2) ax.set_xlabel('Current (pA)') ax.set_ylabel('Firing rate (Hz)') mpl.rcParams['font.size'] = 30 ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) ax.yaxis.set_ticks_position('left') ax.xaxis.set_ticks_position('bottom') ax.set_xlim([stims[0], stims[-1] + 5]) f.savefig(out, dpi=96, bbox_inches='tight', transparent=True) plt.close(f) del (f) # return f return 0
"genion": "", "genbox": "", "vmd": "", "psfgen": "", "namd2": "", "flipdcd": "", "mod9v8": "" } home_dir = os.path.expanduser('~') binaries_fname = os.path.join(home_dir, '.pdbremix.config') if not os.path.isfile(binaries_fname): util.write_dict(binaries_fname, binaries) else: binaries = util.read_dict(binaries_fname) def binary(bin, arg_str='', out_name=None, in_fname=None): """ Runs an external binary, handles arguments, writes out equivalent .sh file, log file, and can pipe in in_fname. """ if bin in binaries and binaries[bin]: bin = binaries[bin] else: util.check_program(bin) if arg_str: util.run_with_output_file( '%s %s' % (bin, arg_str), out_name, in_fname) return '"%s"' % bin
from util import read_dict, write_dic import jieba import re import os from gensim.models import Word2Vec from gensim.models.ldamodel import LdaModel from gensim import corpora from sklearn.cluster import KMeans if __name__ == "__main__": sentence_dict_path = util.txt_prefix + 'id_sentences.pkl' if os.path.exists(sentence_dict_path) is False: print sentence_dict_path, ' does not exit' exit() if os.path.exists(util.txt_prefix + 'id_texts.pkl') is False: id_sentence = read_dict(sentence_dict_path) print len(id_sentence) id_text = {} for i in id_sentence.keys(): sentence = id_sentence[i] temp = ' '.join(sentence) temp = re.sub('-|\\)|\\(|(|/|)', ' ', temp).replace(')', '') cut_str = jieba.cut(temp) text = " ".join(cut_str) text = re.sub(r'\s{2,}', ' ', text) id_text.setdefault(i, (text.replace('(', '')).split(' ')) write_dic(id_text, util.txt_prefix + 'id_texts.pkl') id_text = read_dict(util.txt_prefix + 'id_texts.pkl') texts = id_text.values() features, words = 60, 14 if os.path.exists(util.txt_prefix + str(features) + 'features_1minwords_' +
from util import read_dict, write_dic import jieba import re import os from gensim.models import Word2Vec from gensim.models.ldamodel import LdaModel from gensim import corpora from sklearn.cluster import KMeans if __name__ == "__main__": sentence_dict_path = util.txt_prefix + 'id_sentences.pkl' if os.path.exists(sentence_dict_path) is False: print sentence_dict_path, ' does not exit' exit() if os.path.exists(util.txt_prefix + 'id_texts.pkl') is False: id_sentence = read_dict(sentence_dict_path) print len(id_sentence) id_text = {} for i in id_sentence.keys(): sentence = id_sentence[i] temp = ' '.join(sentence) temp = re.sub('-|\\)|\\(|(|/|)', ' ', temp).replace(')', '') cut_str = jieba.cut(temp) text = " ".join(cut_str) text = re.sub(r'\s{2,}', ' ', text) id_text.setdefault(i, (text.replace('(', '')).split(' ')) write_dic(id_text, util.txt_prefix + 'id_texts.pkl') id_text = read_dict(util.txt_prefix + 'id_texts.pkl') texts = id_text.values() features, words = 60, 14 if os.path.exists(util.txt_prefix + str(features) + 'features_1minwords_' + str(words) + 'context.pkl') is False: