def install(robustus, requirement_specifier, rob_file, ignore_index): # check if already installed pocketsphinx = os.path.join(robustus.env, 'lib/python2.7/site-packages/pocketsphinx.so') if os.path.isfile(pocketsphinx): return cwd = os.getcwd() archive = None try: os.chdir(robustus.cache) build_dir = os.path.join(robustus.cache, 'pocketsphinx-%s' % requirement_specifier.version) if not os.path.isfile(os.path.join(build_dir, 'configure')): archive = robustus.download('pocketsphinx', requirement_specifier.version) unpack(archive) # unfortunately we can't cache pocketsphinx, it has to be rebuild after reconfigure logging.info('Building pocketsphinx') os.chdir(build_dir) sphinxbase_dir = os.path.join(robustus.cache, 'sphinxbase-%s/' % requirement_specifier.version) retcode = run_shell('./configure' + (' --prefix=%s' % robustus.env) + (' --with-python=%s' % os.path.join(robustus.env, 'bin/python')) + (' --with-sphinxbase=%s' % sphinxbase_dir) + (' --with-sphinxbase-build=%s' % sphinxbase_dir), shell=True, verbose=robustus.settings['verbosity'] >= 1) if retcode != 0: raise RequirementException('pocketsphinx configure failed') retcode = run_shell('make clean && make', shell=True, verbose=robustus.settings['verbosity'] >= 1) if retcode != 0: raise RequirementException('pocketsphinx build failed') logging.info('Installing pocketsphinx into virtualenv') retcode = run_shell('make install', shell=True, verbose=robustus.settings['verbosity'] >= 1) if retcode != 0: raise RequirementException('pocketsphinx install failed') fix_rpath(robustus, robustus.env, pocketsphinx, os.path.join(robustus.env, 'lib')) # there is a super weird bug, first import of pocketsphinx fails http://sourceforge.net/p/cmusphinx/bugs/284/ write_file(os.path.join(robustus.env, 'lib/python2.7/site-packages/wrap_pocketsphinx.py'), 'w', 'try:\n' + ' from pocketsphinx import *\n' + 'except:\n' + ' pass\n' + 'from pocketsphinx import *\n') except RequirementException: safe_remove(build_dir) finally: if archive is not None: safe_remove(archive) os.chdir(cwd)
def install(robustus, requirement_specifier, rob_file, ignore_index): # Softlinking to existing PyGtk. TODO: install via configure if os.path.isfile('/usr/lib/python2.7/dist-packages/pygtk.py'): logging.info('Linking pygtk') site_packages_dir = os.path.join(robustus.env, 'lib/python2.7/site-packages') files = ['pygtk.py', 'pygtk.pyc', 'gtk-2.0', 'glib', 'gobject', 'cairo'] for f in files: src = os.path.join('/usr/lib/python2.7/dist-packages', f) if not os.path.exists(src): raise RequirementException('Required packages for system-wide PyGtk missing, %s not found' % f) ln(src, os.path.join(site_packages_dir, f), force=True) pygtk_pth = os.path.join(site_packages_dir, 'pygtk.pth') if not os.path.exists(pygtk_pth): write_file(os.path.join(pygtk_pth), 'w', os.path.join(site_packages_dir, 'gtk-2.0')) else: raise RequirementException('System-wide PyGtk is missing')
def install(robustus, requirement_specifier, rob_file, ignore_index): if requirement_specifier.version != '0.10': raise RequirementException('Only 0.10 version of pygst is supported') # Softlinking to existing gst if os.path.isfile('/usr/lib/python2.7/dist-packages/pygst.py'): logging.info('Linking pygst') site_packages_dir = os.path.join(robustus.env, 'lib/python2.7/site-packages') files = ['pygst.py', 'gst-0.10', 'gstoption.so', 'glib', 'gobject'] for f in files: src = os.path.join('/usr/lib/python2.7/dist-packages', f) if not os.path.exists(src): raise RequirementException('Required packages for system-wide pygst missing, %s not found' % f) ln(src, os.path.join(site_packages_dir, f), force=True) write_file(os.path.join(site_packages_dir, 'pygst.pth'), 'w', os.path.join(site_packages_dir, 'gst-0.10')) else: raise RequirementException('System-wide pygst is missing')
def install(robustus, requirement_specifier, rob_file, ignore_index): if requirement_specifier.version != '0.10': raise RequirementException('Only 0.10 version of pygst is supported') # Softlinking to existing gst if os.path.isfile('/usr/lib/python2.7/dist-packages/pygst.py'): logging.info('Linking pygst') site_packages_dir = os.path.join(robustus.env, 'lib/python2.7/site-packages') files = ['pygst.py', 'gst-0.10', 'gstoption.so', 'glib', 'gobject'] for f in files: src = os.path.join('/usr/lib/python2.7/dist-packages', f) if not os.path.exists(src): raise RequirementException( 'Required packages for system-wide pygst missing, %s not found' % f) ln(src, os.path.join(site_packages_dir, f), force=True) write_file(os.path.join(site_packages_dir, 'pygst.pth'), 'w', os.path.join(site_packages_dir, 'gst-0.10')) else: raise RequirementException('System-wide pygst is missing')
def install(robustus, requirement_specifier, rob_file, ignore_index): # Softlinking to existing PyGtk. TODO: install via configure if os.path.isfile('/usr/lib/python2.7/dist-packages/pygtk.py'): logging.info('Linking pygtk') site_packages_dir = os.path.join(robustus.env, 'lib/python2.7/site-packages') files = [ 'pygtk.py', 'pygtk.pyc', 'gtk-2.0', 'glib', 'gobject', 'cairo' ] for f in files: src = os.path.join('/usr/lib/python2.7/dist-packages', f) if not os.path.exists(src): raise RequirementException( 'Required packages for system-wide PyGtk missing, %s not found' % f) ln(src, os.path.join(site_packages_dir, f), force=True) pygtk_pth = os.path.join(site_packages_dir, 'pygtk.pth') if not os.path.exists(pygtk_pth): write_file(os.path.join(pygtk_pth), 'w', os.path.join(site_packages_dir, 'gtk-2.0')) else: raise RequirementException('System-wide PyGtk is missing')
def run_cluster(all_files, param_dict, out_file, verbose_level): """ interfaces with the second model """ if out_file is not None: write_file(out_file + "_cluster.txt", "\n\n\n" + time.ctime() + "\n===\n") total = 0 for i in range(0, param_dict['num_runs']): total += sermons_cluster(all_files, param_dict, out_file, verbose_level) overall_avg = total / param_dict['num_runs'] if verbose_level > 0: if out_file is None: print("overall average: {}".format(overall_avg)) else: write_file(out_file + "_cluster.txt", "overall average: {}".format(overall_avg)) return (overall_avg)
def generate(traces_path, comb1_path, comb2_path, comb3_path): comb1, comb2, comb3 = extract_dict(traces_path) traces = read_file(traces_path).split('\n') ret1 = [] ret2 = [] ret3 = [] ret1.append("combination " + " ".join(comb1)) ret2.append("combination " + " ".join(comb2)) ret3.append("combination " + " ".join(comb3)) for line in traces: if line is not '': dict1, dict2, dict3 = extract_feature(comb1, comb2, comb3, line) line1 = line.split(":")[0] + " " + convert_to_line(comb1, dict1) line2 = line.split(":")[0] + " " + convert_to_line(comb2, dict2) line3 = line.split(":")[0] + " " + convert_to_line(comb3, dict3) ret1.append(line1) ret2.append(line2) ret3.append(line3) text1 = "\n".join(ret1) text2 = "\n".join(ret2) text3 = "\n".join(ret3) write_file(text1, comb1_path) write_file(text2, comb2_path) write_file(text3, comb3_path)
def main(): job_id = sys.argv[1] ################################################### # Read Sample Blob from RemoteR DB into XLSX file ################################################### tmp_xlsx_filename = 'temp_excel.xlsx' sample_ws_name = 'sample_batch_upload_example' try: print('Read Excel blob from remote r db') excel = getXLSXBlobInDB(job_id = job_id,tag = '1',data_type = 'sample') print('Write Excel blob to file in the disk') write_file(excel, tmp_xlsx_filename) except Error as error: print(error) finally: print('Finish writing excel file') ################################################### # Read Sample Data from XLSX file into Memory ################################################### wb = load_workbook(tmp_xlsx_filename) # print wb.sheetnames ws = wb[sample_ws_name] rowNum = 1 existedUUIDs = "" warnMsg = "" isError = False errorMsg = "" # for row in ws.iter_rows(min_row=1, max_col=31, max_row=2): for row in ws.iter_rows(min_row=2): result_sam = None isError = False # A sample dictionary which stores the validated and converted cell values of each sample row sample_data = {} # required cells study_name = row[0].value local_sample_id = row[3].value local_patient_id = row[4].value if((study_name != None) and (local_sample_id != None) and (local_patient_id != None)): existed_sam_uuid = getSampleUUIDInDBbyLocalSamID(local_sample_id = local_sample_id) if(existed_sam_uuid != None): warnMsg = warnMsg.join("Row number: %i, local sample id %s has already existed in database.\n" %(rowNum, local_sample_id)) if(existedUUIDs == ""): existedUUIDs += existed_sam_uuid else: existedUUIDs += "," + existed_sam_uuid continue colNum = 0 for cell in row: unic = u"%s" %(cell.value) w = unic.encode('utf-8', errors='replace') if(isValidDataFormat(colNum, w)): if(colNum in codeDic): finalW=convertStrToCode(colNum,w) else: finalW=w if(finalW == "None"): finalW = None sample_data[sampleVariableDic[colNum]] = finalW colNum += 1 else: isError=True errorMsg = errorMsg.join("Row number: %i, Col number: %i, wrong data format.\n" %(rowNum, colNum)) break result_sam = processSample(rowNum, sample_data) warn_sam = result_sam["Warn_Msg"] if(warn_sam != None): warnMsg = warnMsg.join(warn_sam) if(existedUUIDs == ""): existedUUIDs += result_sam["Existed_Sam_UUID"] else: existedUUIDs += "," + result_sam["Existed_Sam_UUID"] continue error_sam = result_sam["Error_Msg"] if(error_sam != None): isError = True errorMsg = errorMsg.join(error_sam) break else: break ############################################################ # Update or Insert Patient, Sample, EnrollStudy data into DB ############################################################ # process Patient record """ initial result_pat = {"Error_Msg": None, "Patient_ID": None, "Pat_Ops": None, "EnrollStudy_Ops": None} """ result_pat = processPatient(rowNum, sample_data) error_pat = result_pat["Error_Msg"] if(error_pat != None): isError = True errorMsg = errorMsg.join(error_pat) break is_new_patient = False pat_ops = result_pat["Pat_Ops"] pat_id = result_pat["Patient_ID"] if(pat_ops == "insert"): is_new_patient = True addPatientInDB(pat_id, local_patient_id) if(pat_ops == "update"): is_success = updateLocalPatientIDInDB(local_patient_id, pat_id) if(not is_success): isError = True errorMsg = errorMsg.join("Please contact developer. Fail to update local_patient_id into database based on your input.") break # process Sample record """ initial result_sam = {"Error_Msg": None, "Warn_Msg": None, "Existed_Sam_UUID": None, "Sam_Ops": None, "EnrollStudy_Ops": None} """ sam_ops = result_sam["Sam_Ops"] if(sam_ops == "insert"): sample_data["UUID"] = generate36CharUUID() sample_data["Patient_ID"] = pat_id if(sample_data["Pathological_Status"] == None): sample_data["Pathological_Status"] = 99 if(sample_data["Sample_Class"] == None): sample_data["Sample_Class"] = 99 sample_data["Sample_ID"] = generateSampleID(pat_id, sample_data["Pathological_Status"], sample_data["Sample_Class"]) addSampleInDB(sample_data) # process Enroll Study record pat_enrollstudy_ops = result_pat["EnrollStudy_Ops"] # no_action # update pat_id, src_sam_id, UUID where src_id,src_pat_id # insert src_id, src_pat_id, pat_id, src_sam_id, UUID sam_enrollstudy_ops = result_sam["EnrollStudy_Ops"] # update src_pat_id, pat_id, UUID where src_id, src_sam_id # insert src_id, src_pat_id, pat_id, src_sam_id, UUID enroll_study_data = {'Study_ID': sample_data['Study_ID'], \ 'Patient_ID': pat_id, \ 'Within_Study_Patient_ID': sample_data['Within_Study_Patient_ID'] , \ 'Sample_UUID': sample_data["UUID"], \ 'Within_Study_Sample_ID': sample_data['Within_Study_Sample_ID']} is_new_enroll_study = False last_enroll_study_id = None if((pat_enrollstudy_ops == "insert") and (sam_enrollstudy_ops == "insert")): is_new_enroll_study= True last_enroll_study_id = addEnrollStudyInDB(enroll_study_data) elif(sam_enrollstudy_ops == "update"): is_success = updateEnrollStudyInDB(enroll_study_data, by_variable = "Within_Study_Sample_ID") if(not is_success): isError = True errorMsg = errorMsg.join("Please contact developer. Fail to update new enroll study into database based on your input.") break else: #(sam_erollstudy_ops = "insert" and (pat_erollstudy_ops == "update" or pat_erollstudy_ops == "no_action")): is_success = updateEnrollStudyInDB(enroll_study_data, by_variable = "Within_Study_Patient_ID") if(not is_success): isError = True errorMsg = errorMsg.join("Please contact developer. Fail to update new enroll study into database based on your input.") break addSucessResultIntoRemoteRDB(job_id = job_id, sample_uuid = sample_data["UUID"] , is_new_patient = is_new_patient, patient_id = pat_id, is_new_enroll_study = is_new_enroll_study, enroll_study_id = last_enroll_study_id) rowNum += 1 # 10/15/2018 - Currently results are stored in RemoteR database (for Python code - handle XLSX). # previously stored in SMS database (for previous version PHP code - handle CSV). if(isError): addErrorResultIntoRemoteRDB(job_id = job_id, msg = errorMsg) if(warnMsg != ""): addWarnResultIntoRemoteRDB(job_id = job_id, msg = warnMsg, existed_sam_uuids = existedUUIDs) os.remove(tmp_xlsx_filename) #10/16/2018 - generate an empty success.txt output file and let perl script to detect it # p.s. Which one is faster? 1) Let perl script to detect output file OR 2) to query remoter db for result multiple times until ~ 10 sec? write_file("success","tmp_sam.txt")
""" main.py is the main script for running and interacting with the ETS twitter. """ import utility tweets = utility.get_tweets(utility.login(), num=100) print "NEW TWEETS\n\n\n" print "MARKOV" utility.write_file(tweets)
def format_output(all_files, cluster_list, num_k, kmeans_out, out_file, verbose_level): # dictionary of dictionaries # ex: 'pure in heart': {0: [0, []], 1: [1, [10, 15]], ...} doc_to_clusters = {} # dictionary with key sermon to value theme doc_to_theme = {} out = "" if verbose_level > 0: if out_file is None: print(kmeans_out) else: write_file(out_file + "_cluster.txt", kmeans_out) for i in range(0, len(all_files.filenames)): # get document name with path removed, e.g. true_saints_part_3 doc_name = all_files.filenames[i] doc_name = doc_name[doc_name.rfind("/") + 1:doc_name.rfind(".")] # "whole" document without any subdivison, e.g. true_saints doc_name_whole = doc_name[:doc_name.rfind("_part")] # find the part of this document, e.g. part 3 of true saints doc_part_num = int(doc_name[doc_name.rfind("_") + 1:]) # we have visited this sermon before. that means this must be another # part of the document, e.g. part 3 or part 8 if doc_name_whole in doc_to_clusters: cluster_to_freq_list = doc_to_clusters[doc_name_whole] # add 1 to the frequency cluster_to_freq_list[cluster_list[i]][0] += 1 # also append this part number to the list cluster_to_freq_list[cluster_list[i]][1].append(doc_part_num) # we have not yet seen this sermon before else: # initialize a dictionary entry for this sermon where the key # is the cluster number and the value is an array of size 2 # where the first element is the number of parts found in the # cluster and the second element is a list of the exact parts # that are in the cluster # ex: 1: [1, [10]] --> in cluster 1, 1 document from this sermon # has been found with name part 10 cluster_to_freq_list = {} for j in range(0, num_k): cluster_to_freq_list[j] = [0, []] cluster_to_freq_list[cluster_list[i]] = [1, [doc_part_num]] doc_to_clusters[doc_name_whole] = cluster_to_freq_list # we also want to get a handle on the theme for each full sermon # for reporting doc_to_theme[doc_name_whole] = all_files.target_names[ all_files.target[i]] cluster_dictionary = {} doc_to_best_len = {} doc_to_total_len = {} for i in range(0, num_k): cluster_dictionary[i] = [] # go through each (full) document for doc, clusters in doc_to_clusters.items(): # rank the clusters by their frequency, using the first value in the # clusters dictionary, i.e., the frequency sorted_list = sorted(clusters.items(), key=lambda freq: freq[1][0], reverse=True) # get the number of parts in the best cluster for this document doc_to_best_len[doc] = sorted_list[0][1][0] # get the total number of parts for this document doc_to_total_len[doc] = sorted_list[0][1][0] for i in range(1, num_k): doc_to_total_len[doc] += sorted_list[i][1][0] cluster_dictionary[sorted_list[0][0]].append([doc, sorted_list]) # get a list of clusters that definitely have documents in it. this is to # compensate for a rare case where some cluster does not have any full # documents in it, because the majority vote left this cluster empty valid_k_list = [] for i in range(0, num_k): # list is not empty if cluster_dictionary[i]: valid_k_list.append(i) # calculate the average average_list = [] for i in range(0, num_k): overall_total = 0 num_in_cluster = 0 for doc_list in cluster_dictionary[i]: num_in_cluster += 1 overall_total += (doc_to_best_len[doc_list[0]] / doc_to_total_len[doc_list[0]]) * 100 if i in valid_k_list: average_list.append(overall_total / num_in_cluster) else: # compensate for rare case mentioned above by marking it average_list.append(-1) true_average_list = list(filter(lambda a: a != -1, average_list)) overall_average = sum(true_average_list) / float(len(true_average_list)) out += ">> total average: {0:.2f}%\n".format(overall_average) # print the results for i in range(0, num_k): out += '[Cluster {}]\n'.format(i) for doc_list in cluster_dictionary[i]: out += '\t' + doc_list[0] + ' (' + doc_to_theme[doc_list[0]] + ')' out += "({0:.2f}%)\n".format( (doc_to_best_len[doc_list[0]] / doc_to_total_len[doc_list[0]]) * 100) for partial_cluster_list in doc_list[1]: out += '\t - Cluster {}: ['.format(partial_cluster_list[0]) sorted_parts = sorted(partial_cluster_list[1][1]) for parts in sorted_parts: out += 'part {} '.format(parts) out += ']\n' out += "\n" average = average_list[i] if average != -1: out += " > average: {0:.2f}%\n\n".format(average) else: out += " > average: N/A\n\n" if out_file is None: print(out) else: write_file(out_file + "_cluster.txt", out) return overall_average
def install(robustus, requirement_specifier, rob_file, ignore_index): if requirement_specifier.version != '1.8.1' and not requirement_specifier.version.startswith('bc'): raise RequirementException('can only install panda3d 1.8.1/bc1/bc2') panda_install_dir = os.path.join(robustus.cache, 'panda3d-%s' % requirement_specifier.version) def in_cache(): return os.path.isfile(os.path.join(panda_install_dir, 'lib/panda3d.py')) if not in_cache() and not ignore_index: cwd = os.getcwd() panda3d_tgz = None panda3d_archive_name = None try: panda3d_tgz = robustus.download('panda3d', requirement_specifier.version) panda3d_archive_name = unpack(panda3d_tgz) logging.info('Builduing panda3d') os.chdir(panda3d_archive_name) # link bullet into panda dependencies dir bullet_installations = glob.glob(os.path.join(robustus.env, 'lib/bullet-*')) if len(bullet_installations) > 0: bullet_dir = bullet_installations[0] if sys.platform.startswith('darwin'): panda_thirdparty_dir = 'thirdparty/darwin-libs-a' elif sys.platform.startswith('linux'): panda_thirdparty_dir = 'thirdparty/linux-libs-x64' else: raise RequirementException('unsupported platform ' + sys.platform) os.mkdir('thirdparty') os.mkdir(panda_thirdparty_dir) os.mkdir(os.path.join(panda_thirdparty_dir, 'bullet')) ln(os.path.join(bullet_dir, 'include/bullet'), os.path.join(panda_thirdparty_dir, 'bullet/include')) ln(os.path.join(bullet_dir, 'lib'), os.path.join(panda_thirdparty_dir, 'bullet/lib')) make_panda_options = ['--nothing', '--use-python', '--use-direct', '--use-bullet', '--use-zlib', '--use-png', '--use-jpeg', '--use-tiff', '--use-freetype', '--use-x11', '--use-gl', '--use-nvidiacg', '--use-pandatool', '--use-tinydisplay', '--threads', '4'] if sys.platform.startswith('darwin'): make_panda_options += ['--use-cocoa'] os.environ['CC'] = 'gcc' os.environ['CXX'] = 'g++' makepanda_cmd = [robustus.python_executable, 'makepanda/makepanda.py'] + make_panda_options # command takes much time and output very long, so run_shell isn't used retcode = subprocess.call(makepanda_cmd) if retcode != 0: raise RequirementException('panda3d build failed') # copy panda3d files to cache shutil.rmtree(panda_install_dir, ignore_errors=True) os.mkdir(panda_install_dir) subprocess.call('cp -R built/lib %s' % panda_install_dir, shell=True) subprocess.call('cp -R built/bin %s' % panda_install_dir, shell=True) subprocess.call('cp -R built/include %s' % panda_install_dir, shell=True) subprocess.call('cp -R built/direct %s' % panda_install_dir, shell=True) subprocess.call('cp -R built/pandac %s' % panda_install_dir, shell=True) subprocess.call('cp -R built/models %s' % panda_install_dir, shell=True) subprocess.call('cp -R built/etc %s' % panda_install_dir, shell=True) finally: safe_remove(panda3d_tgz) safe_remove(panda3d_archive_name) os.chdir(cwd) if in_cache(): # install panda3d to virtualenv libdir = os.path.join(robustus.env, 'lib/panda3d') shutil.rmtree(libdir, ignore_errors=True) os.mkdir(libdir) env_etcdir = os.path.join(robustus.env, 'etc') if not os.path.isdir(env_etcdir): os.mkdir(env_etcdir) etcdir = os.path.join(env_etcdir, 'panda3d') shutil.rmtree(etcdir, ignore_errors=True) os.mkdir(etcdir) run_shell('cp -r -p %s/lib/* %s/' % (panda_install_dir, libdir), shell=True) run_shell('cp -r -p %s/direct %s/' % (panda_install_dir, libdir), shell=True) run_shell('cp -r -p %s/pandac %s/' % (panda_install_dir, libdir), shell=True) run_shell('cp -r -p %s/etc/* %s/' % (panda_install_dir, etcdir), shell=True) # modify rpath of libs libdir = os.path.abspath(libdir) if sys.platform.startswith('darwin'): libs = glob.glob(os.path.join(libdir, '*.dylib')) else: libs = glob.glob(os.path.join(libdir, '*.so')) for lib in libs: fix_rpath(robustus, robustus.env, lib, libdir) prc_dir_setup = "import os; os.environ['PANDA_PRC_DIR'] = '%s'" % etcdir write_file(os.path.join(robustus.env, 'lib/python2.7/site-packages/panda3d.pth'), 'w', '%s\n%s\n' % (libdir, prc_dir_setup)) # patch panda prc file with open(os.path.join(etcdir, 'Config.prc'), 'a') as f: extra_options = [] extra_options.append("# enable antialiasing\n" "framebuffer-multisample 1\n" "multisamples 4\n") extra_options.append("# disable panda3d transform caching to avoid memory leak in bullet bindings\n" "garbage-collect-states 0\n") extra_options.append("# enable software rendering as fallback\n" "aux-display p3tinydisplay\n") f.write('\n'.join(extra_options)) else: raise RequirementException('can\'t find panda3d-%s in robustus cache' % requirement_specifier.version)