def run(opts): if opts.voxel: img, img_data = loadnifti(opts.input[0]) if opts.vertex: img, img_data = loadmgh(opts.input[0]) numMerge = len(opts.input) outname = opts.output[0] if opts.mask: if opts.voxel: mask, data_mask = loadnifti(opts.mask[0]) if opts.vertex: mask, data_mask = loadmgh(opts.mask[0]) mask_index = data_mask > 0.99 else: mask_index = np.zeros( (img_data.shape[0], img_data.shape[1], img_data.shape[2])) mask_index = (mask_index == 0) img_data_trunc = img_data[mask_index].astype(np.float32) if opts.scale: img_data_trunc = zscaler(img_data_trunc.T).T for i in xrange(numMerge): print "merging image %s" % opts.input[i] if i > 0: if opts.voxel: _, tempimgdata = loadnifti(opts.input[i]) if opts.vertex: _, tempimgdata = loadmgh(opts.input[i]) tempimgdata = tempimgdata[mask_index].astype(np.float32) if opts.scale: tempimgdata = zscaler(tempimgdata.T).T img_data_trunc = np.column_stack((img_data_trunc, tempimgdata)) # remove nan if any img_data_trunc[np.isnan(img_data_trunc)] = 0 if opts.fastica: ica = FastICA(n_components=int(opts.fastica[0]), max_iter=5000, tol=0.0001) S_ = ica.fit_transform(img_data_trunc).T components = ica.components_.T #scaling fitcomps = np.copy(S_) fitcomps = zscaler(fitcomps) img_data_trunc = np.copy( fitcomps.T) # ram shouldn't be an issue here... np.savetxt("%s.ICA_fit.csv" % sys.argv[4], zscaler(components), fmt='%10.8f', delimiter=',') #save outputs and ica functions for potential ica removal if os.path.exists('ICA_temp'): print 'ICA_temp directory exists' exit() else: os.makedirs('ICA_temp') np.save('ICA_temp/signals.npy', S_) pickle.dump(ica, open("ICA_temp/icasave.p", "wb")) if opts.bothhemi: lh_tempmask = lh_mask_index * 1 rh_tempmask = rh_mask_index * 1 savemgh(lh_tempmask[lh_tempmask == 1], lh_img, lh_mask_index, 'ICA_temp/lh_mask.mgh') savemgh(rh_tempmask[rh_tempmask == 1], rh_img, rh_mask_index, 'ICA_temp/rh_mask.mgh') else: tempmask = mask_index * 1 if opts.voxel: savenifti(tempmask[tempmask == 1], img, mask_index, 'ICA_temp/mask.nii.gz') else: savemgh(tempmask[tempmask == 1], img, mask_index, 'ICA_temp/mask.mgh') if opts.voxel: savenifti(img_data_trunc.astype(np.float32), img, mask_index, outname) if opts.vertex: savemgh(img_data_trunc.astype(np.float32), img, mask_index, outname)
def run(opts): element, image_array, masking_array, maskname, affine_array, _, _, surfname, _, _, _ = read_tm_filetype( opts.tmifile[0]) img_data_trunc = image_array[0] del image_array # reduce ram usage img_data_trunc = img_data_trunc.astype(np.float32) img_data_trunc[np.isnan(img_data_trunc)] = 0 if opts.covar: covar = np.genfromtxt('dmy_Blocks.csv', delimiter=',', dtype=np.float) img_data_trunc = lm_residuals(img_data_trunc.T, covar).T if opts.scale: img_data_trunc = zscaler(img_data_trunc) if opts.maxnpcacomponents: numpcacomps = opts.maxnpcacomponents = [0] else: numpcacomps = img_data_trunc.shape[1] if opts.pca: pca = PCA(n_components=numpcacomps) S_ = pca.fit_transform(img_data_trunc).T for i in range(len(pca.explained_variance_ratio_)): if (pca.explained_variance_ratio_[i] < 0.01): start_comp_number = i print("Component %d explains %1.4f of the variance." % (i, pca.explained_variance_ratio_[0:i].sum())) if pca.explained_variance_ratio_[0:i].sum() < 0.80: pass else: break print( "%d number of components, explaining %1.2f of the variance." % (start_comp_number, (pca.explained_variance_ratio_[0:start_comp_number].sum() * 100))) rsquare_scores = [] std_err = [] w_rsquare_scores = [] range_comp = np.arange(0, numpcacomps - 2, 1) for comp_number in range_comp: x = np.array( list(range(len(pca.explained_variance_ratio_)))[comp_number:]) y = pca.explained_variance_ratio_[comp_number:] slope, intercept, r_value, p_value, se = stats.linregress(x, y) rsquare_scores.append((r_value**2)) std_err.append((se)) w_rsquare_scores.append( (r_value**2 * ((range_comp[-1] - comp_number + 1) / range_comp[-1]))) best_comp = np.argmax(rsquare_scores) best_comp2 = np.argmin(std_err) best_comp3 = np.argmax(w_rsquare_scores) print( "Best Component %d; R-square residual score %1.4f; variance explained %1.4f" % (best_comp, rsquare_scores[best_comp], pca.explained_variance_ratio_[:best_comp].sum())) x = np.array( list(range(len(pca.explained_variance_ratio_)))[best_comp:]) y = pca.explained_variance_ratio_[best_comp:] m, b = np.polyfit(x, y, 1) # %matplotlib xaxis = np.arange(pca.explained_variance_ratio_.shape[0]) + 1 plt.plot(xaxis, pca.explained_variance_ratio_, 'ro-', linewidth=2) plt.axvline(best_comp, color='r', linestyle='dashed', linewidth=2) plt.text(int(best_comp - 10), pca.explained_variance_ratio_.max() * .99, 'Noise; Comp=%d, Sum V(e)=%1.2f' % (best_comp, pca.explained_variance_ratio_[:best_comp].sum()), rotation=90) plt.axvline(start_comp_number, color='g', linestyle='dashed', linewidth=2) plt.text(int(start_comp_number - 10), pca.explained_variance_ratio_.max() * .99, 'Threshold; Comp=%d, Sum V(e)=%1.2f' % (start_comp_number, pca.explained_variance_ratio_[:start_comp_number].sum()), rotation=90) plt.axvline(best_comp3, color='b', linestyle='dashed', linewidth=2) plt.text( int(best_comp3 - 10), pca.explained_variance_ratio_.max() * .99, 'Weight Noise; Comp=%d, Sum V(e)=%1.2f' % (best_comp3, pca.explained_variance_ratio_[:best_comp3].sum()), rotation=90) plt.plot(xaxis, m * xaxis + b, '--') plt.title('Scree Plot') plt.xlabel('Principal Component') plt.ylabel('Explained Variance Ratio') plt.show() ###### TEST ########## # xaxis = np.arange(pca.explained_variance_ratio_.shape[0]) + 1 # plt.plot(xaxis, pca.explained_variance_ratio_, 'ro-', linewidth=2) # plt.axvline(start_comp_number, color='b', linestyle='dashed', linewidth=2) # plt.title('Scree Plot') # plt.xlabel('Principal Component') # plt.ylabel('Explained Variance Ratio') # plt.show() if opts.fastica: if opts.pca: num_comp = start_comp_number elif opts.numicacomponents: num_comp = int(opts.numicacomponents[0]) else: print("unknown number of compenents") exit() print(num_comp) img_data_trunc[np.isnan(img_data_trunc)] = 0 ica, sort_mask, _ = tmi_run_ica(img_data_trunc, num_comp, variance_threshold=.5, masking_array=masking_array, affine_array=affine_array, filetype='mgh', outname='ica.mgh') components = ica.components_.T if opts.timeplot: # generate graphs analysis_name = opts.timeplot[0] # components = np.copy(fitcomps) components = zscaler(components[:, sort_mask].T).T subs = np.array(list(range(components.shape[0]))) + 1 time_step = 1 / 100 if os.path.exists(analysis_name): print('%s directory exists' % analysis_name) exit() else: os.makedirs(analysis_name) plt.figure(figsize=(10, 5)) for i in range(components.shape[1]): plt.plot(subs, components[:, i], 'ro-', linewidth=2) plt.title('Component %d Plot' % (i + 1)) plt.xlabel('Time or Subject (units)') plt.savefig('%s/%s_timeplot_comp%d.jpg' % (analysis_name, analysis_name, (i + 1))) plt.clf() ps = np.abs(np.fft.fft(components[:, i]))**2 freqs = np.fft.fftfreq(components[:, i].size, time_step) idx = np.argsort(freqs) plt.plot(np.abs(freqs[idx]), ps[idx]) plt.title('Component %d Powerspectrum' % (i + 1)) plt.xlabel('Unit Frequency (Hz / 100)') plt.savefig('%s/%s_power_comp%d.jpg' % (analysis_name, analysis_name, (i + 1))) plt.clf()
def tmi_run_ica(img_data_trunc, num_comp, masking_array, affine_array, variance_threshold=0.9, timeplot=False, timeplot_name=None, filetype='nii.gz', outname='ica.nii.gz'): ica = FastICA(n_components=int(num_comp), max_iter=1000, tol=0.00001) S_ = ica.fit_transform(img_data_trunc).T components = ica.components_.T #scaling fitcomps = np.zeros_like(S_) fitcomps[:] = np.copy(S_) fitcomps = zscaler(fitcomps) img_data_trunc = np.copy(fitcomps.T) # ram shouldn't be an issue here... np.savetxt("ICA_fit.csv", zscaler(components), fmt='%10.8f', delimiter=',') # variance explained. explained_total_var = np.zeros((int(num_comp))) explained_var_ratio = np.zeros((int(num_comp))) # total variance back_projection = ica.inverse_transform(S_.T) total_var = back_projection.var() for i in range(int(num_comp)): tempcomps = np.copy(S_) tempcomps[i, :] = 0 temp_back_proj = ica.inverse_transform(tempcomps.T) temp_var = temp_back_proj.var() explained_var_ratio[i] = total_var - temp_var explained_total_var[i] = (total_var - temp_var) / total_var print("ICA # %d; Percent of Total Variance %1.3f" % ((i + 1), explained_total_var[i] * 100)) explained_var_ratio = explained_var_ratio / explained_var_ratio.sum() sum_total_variance_explained = explained_total_var.sum() print("Total variance explained by all components = %1.3f" % sum_total_variance_explained) print("Re-ordering components") sort_mask = (-1 * explained_total_var).argsort() if sum_total_variance_explained > variance_threshold: #sort data sort_mask = (-1 * explained_total_var).argsort() np.savetxt("ICA_total_var.csv", explained_total_var[sort_mask], fmt='%1.5f', delimiter=',') np.savetxt("ICA_explained_var_ratio.csv", explained_var_ratio[sort_mask], fmt='%1.5f', delimiter=',') img_data_trunc = img_data_trunc[:, sort_mask] if filetype == 'nii.gz': savenifti_v2(img_data_trunc, masking_array[0], outname, affine_array[0]) else: pointer = 0 position_array = [0] for i in range(len(masking_array)): pointer += len(masking_array[i][masking_array[i] == True]) position_array.append(pointer) del pointer for i in range(len(masking_array)): start = position_array[i] end = position_array[i + 1] savemgh_v2(img_data_trunc[start:end], masking_array[i], "%d_%s" % (i, outname), affine_array[i]) # save outputs and ica functions for potential ica removal if os.path.exists('ICA_temp'): print('ICA_temp directory exists') exit() else: os.makedirs('ICA_temp') np.save('ICA_temp/signals.npy', S_) pickle.dump(ica, open("ICA_temp/icasave.p", "wb")) return ica, sort_mask, sum_total_variance_explained
def run(opts): image_array = [] affine_array = [] masking_array = [] maskname = [] vertex_array = [] face_array = [] surfname = [] adjacency_array = [] tmi_history = [] if opts.outputname: outname = opts.outputname[0] if not outname.endswith('tmi'): if opts.outputtype == 'binary': if not outname.endswith('tmi'): outname += '.tmi' else: outname += '.ascii.tmi' if opts.append: outname = opts.append[0] _, image_array, masking_array, maskname, affine_array, vertex_array, face_array, surfname, adjacency_array, tmi_history, subjectids = read_tm_filetype( outname) if opts.inputimages: for i in range(len(opts.inputimages)): basename, file_ext = os.path.splitext(opts.inputimages[i]) if file_ext == '.gz': os.system("zcat %s > %s" % (opts.inputimages[i], basename)) img = nib.load('%s' % basename) else: img = nib.load(opts.inputimages[i]) img_data = img.get_data() if opts.inputmasks: mask = nib.load(opts.inputmasks[i]) mask_data = mask.get_data() if not np.array_equal(img_data.shape[:3], mask_data.shape[:3]): print "Error mask data dimension do not fit image dimension" exit() mask_data = mask_data == 1 img_data = img_data[mask_data] else: img_data, mask_data = maskdata(img_data) masking_array.append(np.array(mask_data)) image_array.append(np.array(img_data)) affine_array.append(img.affine) maskname.append(np.array(os.path.basename(opts.inputimages[i]))) if file_ext == '.gz': os.system("rm %s" % basename) if opts.concatenateimages: img = nib.load(opts.concatenateimages[0]) img_data = img.get_data() numMerge = len(opts.concatenateimages) if opts.inputmasks: if not len(opts.inputmasks) == 1: print "Only one mask can be added using concatenate. See help (hint: rerun using append option for multiple modalities/surfaces)" exit() mask = nib.load(opts.inputmasks[0]) mask_data = mask.get_data() if not np.array_equal(img_data.shape[:3], mask_data.shape[:3]): print "Error mask data dimension do not fit image dimension" exit() mask_data = mask_data == 1 img_data = img_data[mask_data].astype(np.float32) else: print "Creating mask from first image." img_data, mask_data = maskdata(img_data) for i in xrange(numMerge): print "merging image %s" % opts.concatenateimages[i] if i > 0: tempdata = nib.load(opts.concatenateimages[i]).get_data() tempdata = tempdata[mask_data].astype(np.float32) if opts.scale: tempdata = zscaler(tempdata.T).T img_data = np.column_stack((img_data, tempdata)) else: if opts.scale: img_data = zscaler(img_data.T).T masking_array.append(np.array(mask_data)) image_array.append(np.array(img_data)) affine_array.append(img.affine) if opts.concatenatename: maskname.append(np.array(os.path.basename( opts.concatenatename[0]))) if opts.inputtext: for i in range(len(opts.inputtext)): #img_data = np.genfromtxt(opts.inputtext[i], delimiter=',') # slower, more ram usage img_data = [] with open(opts.inputtext[i]) as data_file: for line in data_file: img_data.append(line.strip().split(',')) img_data = np.array(img_data).astype(np.float32) img_data, mask_data = maskdata(img_data) masking_array.append(np.array(mask_data)) image_array.append(np.array(img_data)) if opts.concatenatetext: firstimg_data = np.genfromtxt(opts.concatenatetext[0], delimiter=',') numMerge = len(opts.concatenatetext) for i in xrange(numMerge): print "merging text file %s" % opts.concatenatetext[i] if i > 0: tempdata = np.genfromtxt(opts.concatenatetext[i], delimiter=',') img_data = np.column_stack((img_data, tempdata)) img_data, mask_data = maskdata(img_data) masking_array.append(np.array(mask_data)) image_array.append(np.array(img_data)) if opts.concatenatebinary: firstimg_data = np.fromfile(opts.concatenatebinary[0], dtype='f') numMerge = len(opts.concatenatebinary) for i in xrange(numMerge): print "merging simple float binary file %s" % opts.concatenatebinary[ i] if i > 0: tempdata = np.fromfile(opts.concatenatebinary[i], dtype='f') img_data = np.column_stack((img_data, tempdata)) img_data, mask_data = maskdata(img_data) masking_array.append(np.array(mask_data)) image_array.append(np.array(img_data)) if opts.inputfreesurfer: for i in range(len(opts.inputfreesurfer)): v, f = convert_fs(str(opts.inputfreesurfer[i])) vertex_array.append(v) face_array.append(f) surfname.append(np.array(os.path.basename( opts.inputfreesurfer[i]))) if opts.inputgifti: for i in range(len(opts.inputgifti)): v, f = convert_gifti(str(opts.inputgifti[i])) vertex_array.append(v) face_array.append(f) surfname.append(np.array(os.path.basename(opts.inputgifti[i]))) if opts.inputmniobj: for i in range(len(opts.inputmniobj)): v, f = convert_mni_object(str(opts.inputmniobj[i])) vertex_array.append(v) face_array.append(f) surfname.append(np.array(os.path.basename(opts.inputmniobj[i]))) if opts.inputply: for i in range(len(opts.inputply)): v, f = convert_ply(str(opts.inputply[i])) vertex_array.append(v) face_array.append(f) surfname.append(np.array(os.path.basename(opts.inputply[i]))) if opts.inputadjacencyobject: for i in range(len(opts.inputadjacencyobject)): adjacency_array.append(np.load(str(opts.inputadjacencyobject[i]))) if not np.equal(len(adjacency_array), len(masking_array)): if not len(adjacency_array) % len(masking_array) == 0: print "Number of adjacency objects does not match number of images." else: print "Error number of adjacency objects is not divisable by the number of masking arrays." exit() # Write tmi file if not image_array == []: write_tm_filetype(outname, output_binary=opts.outputtype == 'binary', image_array=np.vstack(image_array), masking_array=masking_array, maskname=maskname, affine_array=affine_array, vertex_array=vertex_array, face_array=face_array, surfname=surfname, adjacency_array=adjacency_array, checkname=False, tmi_history=tmi_history) else: write_tm_filetype(outname, output_binary=opts.outputtype == 'binary', masking_array=masking_array, maskname=maskname, affine_array=affine_array, vertex_array=vertex_array, face_array=face_array, surfname=surfname, adjacency_array=adjacency_array, checkname=False, tmi_history=tmi_history)