def buildMusicList(self): ''' 访问top100歌曲url 提取真正的歌曲文件地址 ''' mList=codecs.open(self.currentPath+"/download.txt") lines= mList.readlines() downloadClass = DownloadHTMLParser() i=1 for line in lines: if i<=100: request= urllib2.Request(line) listFile = urllib2.urlopen(request) downloadClass.feed(listFile.read()) i+=1 if downloadClass.mp3_128kbpsFiles: self.rowCount=len(downloadClass.mp3_128kbpsFiles) self.songAddresses = downloadClass.mp3_128kbpsFiles mp3UrlList = codecs.open(self.currentPath+"/musicFile.txt","w+","utf-8") for mp3File in downloadClass.mp3_128kbpsFiles: mp3UrlList.write(mp3File+"\r\n") #mp3UrlList.close() else:pass
def download( url, gently=False, refresh=False, no_user_agent=False, expect_xml=False, ): if not os.path.isdir(_CACHE): os.mkdir(_CACHE) key = hashlib.md5(url).hexdigest() if no_user_agent: key += 'nouseragent' fp = os.path.join(_CACHE, key) if os.path.isfile(fp): age = time.time() - os.stat(fp).st_mtime if age > 60 * 60 * 24: os.remove(fp) if not os.path.isfile(fp) or refresh: print "* requesting", url r = realistic_request(url, no_user_agent=no_user_agent) if r.status_code == 200: if expect_xml and not ( 'xml' in r.headers['Content-Type'] or '<rss' in r.text ): raise NotXMLResponse(r.headers['Content-Type']) with codecs.open(fp, 'w', 'utf8') as f: f.write(r.text) if gently: time.sleep(random.randint(1, 4)) else: raise Exception(r.status_code) with codecs.open(fp, 'r', 'utf8') as f: return f.read()
def test_scanner_uses_sha256_by_default(self): dirname = self.create_sample_bag() # single-level output = codecs.open(os.path.join(dirname, 'default_checkm.txt'), encoding='utf-8', mode="w") output = self.reporter.create_checkm_file(scan_directory=dirname, checkm_filename='default_checkm.txt', checkm_file=output) input = open(os.path.join(dirname, 'default_checkm.txt'), 'r') lines = self.checkm_p.parse(input) for line in lines: if not( line[1] == 'dir' ): self.assertEqual(line[1], 'sha256') break # multilevel output = codecs.open(os.path.join(dirname, 'default_checkm.txt'), encoding='utf-8', mode="w") output = self.reporter.create_multilevel_checkm(top_directory=dirname, checkm_filename='default_checkm.txt') input = open(os.path.join(dirname, 'default_checkm.txt'), 'r') lines = self.checkm_p.parse(input) for line in lines: if not( line[1] == 'dir' ): self.assertEqual(line[1], 'sha256') break
def send_file_content(): import re import codecs import time file_pattern = r'E:\github\zhihu_spider\data\question_answer_content_2015_05_24.txt.partial_%s' # filename = os.path.join(PATH, 'data', 'question_answer_content_2015_05_24.txt.partial_10.html') # filename = r'E:\github\zhihu\text_data\zhihu_v500_l0-100_d20150228_r1272-1910.html' # file_content = codecs.open(filename).read() for file_index in range(1, 11): filename = file_pattern % file_index print file_index new_line_list = [] with codecs.open(filename, encoding='utf-8') as f: index = 0 for line in f.readlines(): line = re.sub(r'\<div width="\d+"\>', '', line) question_subject_match = re.search(r'<table><tr><td><font color="#4EABF9"><u>(.*?)</u></font>', line) answer_list = re.findall(r'A:.*?<br>', line) if (not question_subject_match): print line.strip() continue for answer in answer_list: if not answer.replace('A:', '').strip(): print line.strip() continue index += 1 new_line = question_subject_match.group().strip().replace('<u>', '<u>%s' % index) + "\n<br>" + "\n".join(answer_list)+'</tr></td></table>'#网页显示,不需要考虑\n的问题 new_line_list.append(new_line) file_content = '\n'.join(new_line_list) send_to_163_mail(file_content, mail_to) time.sleep(3) codecs.open(filename + '.html', mode='wb', encoding='utf-8').write('<html><head><meta charset=\'utf-8\'></head><body>'+'\n'.join(new_line_list)+'</body></html>')
def write_syn_dataset(csvPathname, rowCount, colCount, SEED): r1 = random.Random(SEED) if UTF8 or UTF8_MULTIBYTE: dsf = codecs.open(csvPathname, encoding='utf-8', mode='w+') elif UTF16: dsf = codecs.open(csvPathname, encoding='utf-16', mode='w+') else: dsf = open(csvPathname, "w+") for i in range(rowCount): if UTF16: u = unichr(0x2018) + unichr(6000) + unichr(0x2019) rowDataCsv = u else: # both ascii and utf-8 go here? rowData = [] for j in range(colCount): r = generate_random_utf8_string(length=2) rowData.append(r) rowDataCsv = ",".join(rowData) if UTF16: # we're already passing it unicode. no decoding needed print "utf16:", repr(rowDataCsv), type(rowDataCsv) decoded = rowDataCsv else: print "str:", repr(rowDataCsv), type(rowDataCsv) decoded = rowDataCsv.decode('utf-8') # this has the right length..multibyte utf8 are decoded print "utf8:" , repr(decoded), type(decoded) # dsf.write(rowDataCsv + "\n") dsf.write(decoded + "\n") dsf.close()
def _run(self, *cmdargs): cmdargs = [str(x) for x in cmdargs] p1 = self.tmpdir.join("stdout") p2 = self.tmpdir.join("stderr") print_("running:", ' '.join(cmdargs)) print_(" in:", str(py.path.local())) f1 = codecs.open(str(p1), "w", encoding="utf8") f2 = codecs.open(str(p2), "w", encoding="utf8") try: now = time.time() popen = self.popen(cmdargs, stdout=f1, stderr=f2, close_fds=(sys.platform != "win32")) ret = popen.wait() finally: f1.close() f2.close() f1 = codecs.open(str(p1), "r", encoding="utf8") f2 = codecs.open(str(p2), "r", encoding="utf8") try: out = f1.read().splitlines() err = f2.read().splitlines() finally: f1.close() f2.close() self._dump_lines(out, sys.stdout) self._dump_lines(err, sys.stderr) return RunResult(ret, out, err, time.time()-now)
def compile_html(self, source, dest): """Compile reSt into HTML.""" if not has_docutils: raise Exception('To build this site, you need to install the ' '"docutils" package.') try: os.makedirs(os.path.dirname(dest)) except: pass error_level = 100 with codecs.open(dest, "w+", "utf8") as out_file: with codecs.open(source, "r", "utf8") as in_file: data = in_file.read() output, error_level, deps = rst2html( data, settings_overrides={ 'initial_header_level': 2, 'record_dependencies': True, 'stylesheet_path': None, 'link_stylesheet': True, 'syntax_highlight': 'short', }) out_file.write(output) deps_path = dest + '.dep' if deps.list: with codecs.open(deps_path, "wb+", "utf8") as deps_file: deps_file.write('\n'.join(deps.list)) else: if os.path.isfile(deps_path): os.unlink(deps_path) if error_level < 3: return True else: return False
def process_slides(): with codecs.open('presentation.html', 'w', encoding='utf8') as outfile: md = codecs.open('slides.md', encoding='utf8').read() md_slides = md.split('\n---\n') print 'Compiled %s slides.' % len(md_slides) slides = [] # Process each slide separately. for md_slide in md_slides: slide = {} sections = md_slide.split('\n\n') # Extract metadata at the beginning of the slide (look for key: value) # pairs. metadata_section = sections[0] metadata = parse_metadata(metadata_section) slide.update(metadata) remainder_index = metadata and 1 or 0 # Get the content from the rest of the slide. content_section = '\n\n'.join(sections[remainder_index:]) html = markdown.markdown(content_section) slide['content'] = postprocess_html(html, metadata) slides.append(slide) template = jinja2.Template(open('_base.html').read()) outfile.write(template.render(locals()))
def cdrom(): # now parse the cd-rom raw texts as we got in corrupt TEI-XML from the INL for textN in os.listdir("../data/original/unannotated/cdrom_xml/"): if not textN.endswith(".xml"): continue print textN with open("../data/original/unannotated/cdrom_xml/"+textN) as oldF: try: text = text_tag.split(oldF.read(), maxsplit=1)[1] soup = Soup(text) text = soup.get_text() text = clean_text(text) if not text.startswith("voor de tekst zie"): with codecs.open("../data/uniform/unannotated/cdrom/"+str(textN)+".txt", "w+", "utf-8") as newF: newF.write(text) except: pass # now parse the cd-rom raw texts from Brill (which we didn't get via the INL) # in the format as Lisanne downloaded them from the Cd-rom for textN in os.listdir("../data/original/unannotated/cdrom_txt/"): if not textN.endswith(".txt"): continue print textN with codecs.open("../data/original/unannotated/cdrom_txt/"+textN, "r+", "utf-8-sig") as oldF: words = [clean_token(w) for w in oldF.read().strip().split()] with codecs.open("../data/uniform/unannotated/cdrom/"+textN, "w+", "utf-8-sig") as newF: newF.write(" ".join(words)) return
def __call__(self, file, config): """ Compare expected output to actual output and report result. """ cfg_section = get_section(file, config) if config.get(cfg_section, 'skip'): raise nose.plugins.skip.SkipTest, 'Test skipped per config.' input_file = file + config.get(cfg_section, 'input_ext') with codecs.open(input_file, encoding="utf-8") as f: input = f.read() output_file = file + config.get(cfg_section, 'output_ext') with codecs.open(output_file, encoding="utf-8") as f: expected_output = f.read() output = markdown.markdown(input, **get_args(file, config)) if tidy and config.get(cfg_section, 'normalize'): # Normalize whitespace before comparing. expected_output = normalize(expected_output) output = normalize(output) elif config.get(cfg_section, 'normalize'): # Tidy is not available. Skip this test. raise nose.plugins.skip.SkipTest, 'Test skipped. Tidy not available in system.' diff = [l for l in difflib.unified_diff(expected_output.splitlines(True), output.splitlines(True), output_file, 'actual_output.html', n=3)] if diff: raise util.MarkdownSyntaxError('Output from "%s" failed to match expected ' 'output.\n\n%s' % (input_file, ''.join(diff)))
def split_linkedin_dump(): skip = 2100000 count = 0 log = codecs.open("C:\\data\\log"+str(skip)+".txt",'w', encoding="utf-8") id_map = codecs.open("C:\\data\\idmap"+str(skip)+".txt",'w', encoding="utf-8") linkedin_dump = codecs.open('D:\\result.csv', encoding="utf-8") out = "" linkedin_dump.next() for line in linkedin_dump: x = 0 if count < skip: count+=1 if count % 10000 == 0: print count continue print str(count)+':'+str(len(line)) log.write(str(count)+' '+str(len(line))) if line[0] == '"': x = line.find('",') log.write(str(count)+' '+line[1:x]+'\n') verbose.debug(str(count)+' '+line[1:x]) id_map.write(str(count)+' '+line[1:x]+'\n') count+=1 try: out = codecs.open("C:\\data\\linkedin\\"+line[1:x].strip().replace('"'," ").split('?')[0],'w', encoding="utf-8") except Exception, e: print e else: log.write("[EXCEPTION]"+str(count)+":"+line+'\n') out.write(line[x:])
def manual_convert(): root = upath.abspath(sys.argv[1]) if len(sys.argv) == 3 and sys.argv[2] == '--convert': dry_run = False else: dry_run = True log = codecs.open('../convert.log', 'w', 'utf-8') for curdir, subdirs, filenames in os.walk(root, topdown=True): for name in filenames: try: src = os.path.join(curdir, name) print('Converting %s' % src) with open(src, 'rb') as f: data = f.read() dst = src if dry_run: dst = os.path.join(curdir, '_%s' % name) else: dst = src utf8_data = compat.to_text(data, encoding='gb18030') with codecs.open(dst, 'w', 'utf-8') as f: f.write(utf8_data) except Exception as e: traceback.print_exc() log.write(src)
def generate_theme_file(theme_file_path, dict_seq, new_theme_file_path): """Appends `dict_seq` to `new_theme_file_path`, converting it to plist format. :param theme_file_path: path to the theme file to read from :param dict_seq: list of dictionaries with color definitions :param new_theme_file_path: path to the created theme file """ with codecs.open(theme_file_path, 'r', 'utf-8') as f: # parse dict objects to plist format tempate_to_write = (dict_to_plist(d) for d in dict_seq) # fix codecs.StreamReaderWriter.read inaccuracy f = StringIO.StringIO(f.read()) # find end of colors difinition end_pos = seek_until(f, '</array>') # text until insertation place f.seek(0) begin_text = f.read(end_pos) # new colors definition plus end of file f.seek(end_pos) end_text = '\n'.join(tempate_to_write) + f.read() new_theme_text = begin_text + end_text with codecs.open(new_theme_file_path, 'w', 'utf-8') as f: f.write(new_theme_text)
def _parse_hosts_inventory(self, inventory_path): """ Read all the available hosts inventory information into one big list and parse it. """ hosts_contents = [] if os.path.isdir(inventory_path): self.log.debug("Inventory path {} is a dir. Looking for inventory files in that dir.".format(inventory_path)) for fname in os.listdir(inventory_path): # Skip .git folder if fname == '.git': continue path = os.path.join(inventory_path, fname) if os.path.isdir(path): continue with codecs.open(path, 'r', encoding='utf8') as f: hosts_contents += f.readlines() else: self.log.debug("Inventory path {} is a file. Reading as inventory.".format(inventory_path)) with codecs.open(inventory_path, 'r', encoding='utf8') as f: hosts_contents = f.readlines() # Parse inventory and apply it to the hosts hosts_parser = parser.HostsParser(hosts_contents) for hostname, key_values in hosts_parser.hosts.items(): self.update_host(hostname, key_values)
def _translate_document(path): if path not in _translated_documents: with codecs.open(path, "r", "utf-8") as infile: with codecs.open(_compiled_path(path), "w", "utf-8") as outfile: _translated_documents[path] = DocumentTranslator(infile, outfile, path) _translated_documents[path].translate() return _translated_documents[path]
def copy_static_entry(source, targetdir, builder, context={}, exclude_matchers=(), level=0): """Copy a HTML builder static_path entry from source to targetdir. Handles all possible cases of files, directories and subdirectories. """ if exclude_matchers: relpath = relative_path(builder.srcdir, source) for matcher in exclude_matchers: if matcher(relpath): return if path.isfile(source): target = path.join(targetdir, path.basename(source)) if source.lower().endswith('_t') and builder.templates: # templated! fsrc = open(source, 'r', encoding='utf-8') fdst = open(target[:-2], 'w', encoding='utf-8') fdst.write(builder.templates.render_string(fsrc.read(), context)) fsrc.close() fdst.close() else: copyfile(source, target) elif path.isdir(source): if level == 0: for entry in os.listdir(source): if entry.startswith('.'): continue copy_static_entry(path.join(source, entry), targetdir, builder, context, level=1, exclude_matchers=exclude_matchers) else: target = path.join(targetdir, path.basename(source)) if path.exists(target): shutil.rmtree(target) shutil.copytree(source, target)
def tf_idf_predict_v(dicts,filename,filename1,filename2): t=CHI.dict_df(filename);mid_dict=dict(); file1=codecs.open(filename1, 'r','utf-8'); readlists=file1.readlines(); l=len(readlists); print(len(dicts)) for i in range(l): texts=readlists[i].strip().split(' '); data=[0 for i in range(len(dicts))]; l_word=len(texts); for text in texts: if text in mid_dict: mid_dict[text]+=1; else: mid_dict[text]=1; for k,x in mid_dict.items(): if k in dicts: tf=float(x)/l_word; idf=math.log(8000/float(t[k])); #print(dicts[k]) data[int(dicts[k])-1]=tf*idf; l_word=0; fileHandle = codecs.open (filename2, 'a' ,"utf-8") for k,x in enumerate(data): fileHandle.write(str(k)+':'+str(x)+' '); fileHandle.write('\n'); fileHandle.close() ;
def _add_license(root, license_path="include-license.txt", whitelist=[]): """ Read a license from license_path and append it to all files under root whose extension is in _license_exts. """ if not os.path.isfile(license_path): return lfile = codecs.open(license_path, "r", encoding="utf_8_sig") license = lfile.read() lfile.close() license_files = [] for base, dirs, files in os.walk(root): if whitelist: bl = [d for d in dirs if not d in whitelist] while bl: dirs.pop(dirs.index(bl.pop())) license_files.extend([os.path.join(base, f) for f in files if f.endswith(_license_exts)]) for f in license_files: source = codecs.open(f, "r", encoding="utf_8_sig") tmpfd, tmppath = tempfile.mkstemp(".tmp", "dfbuild.") tmpfile = os.fdopen(tmpfd, "w") wrapped = codecs.getwriter("utf_8_sig")(tmpfile) wrapped.write(license) wrapped.write("\n") wrapped.write(source.read()) source.close() tmpfile.close() shutil.copy(tmppath, f) os.unlink(tmppath)
def _ansi2utf8(path): f = codecs.open(path, "r", "utf-8") c = f.read() f.close() f = codecs.open(path, "w", "utf_8_sig") f.write(c) f.close()
def load_json(self, name, config_old, save=False): config_new = config_old path = './res_mods/configs/spoter_mods/%s/' % self.name if not os.path.exists(path): os.makedirs(path) new_path = '%s%s.json' % (path, name) if save: with codecs.open(new_path, 'w', encoding='utf-8-sig') as json_file: data = json.dumps(config_old, sort_keys=True, indent=4, ensure_ascii=False, encoding='utf-8-sig', separators=(',', ': ')) json_file.write('%s' % self.byte_ify(data)) json_file.close() config_new = config_old elif os.path.isfile(new_path): try: with codecs.open(new_path, 'r', encoding='utf-8-sig') as json_file: data = self.json_comments(json_file.read().decode('utf-8-sig')) config_new = self.byte_ify(json.loads(data)) json_file.close() except Exception as e: self.sys_mess() print '%s%s' % (self.sys_mes['ERROR'], e) else: self.sys_mess() print '%s[%s, %s %s]' % (self.sys_mes['ERROR'], self.code_pa(self.description), self.version, self.sys_mes['MSG_RECREATE_CONFIG']) with codecs.open(new_path, 'w', encoding='utf-8-sig') as json_file: data = json.dumps(config_old, sort_keys=True, indent=4, ensure_ascii=False, encoding='utf-8-sig', separators=(',', ': ')) json_file.write('%s' % self.byte_ify(data)) json_file.close() config_new = config_old print '%s[%s, %s %s]' % (self.sys_mes['INFO'], self.code_pa(self.description), self.version, self.sys_mes['MSG_RECREATE_CONFIG_DONE']) return config_new
def _readTrainFile(self, inputFile, outputSeg, outputPos, tagNum): outSeg = codecs.open(outputSeg, 'w', self._textEncoding) outPos = codecs.open(outputPos, 'w', self._textEncoding) with codecs.open(inputFile, 'r', self._textEncoding) as inFile: for line in inFile: ret = line.strip().split() if not ret: continue for item in ret[1:]: if not item: continue index1 = item.find(u'[') if index1 >= 0: item = item[index1+1:] index2 = item.find(u']') if index2 > 0: item = item[:index2] word, tag = item.split(u'/') if tag == 'w' and word in [u'。', u',']: outSeg.write('\n') outPos.write('\n') continue outPos.write('%s %s\n' % (word, tag)) if word: if tagNum == 4: self._write4Tag(word, outSeg) elif tagNum == 6: self._write6Tag(word, outSeg) outSeg.write('\n') outPos.write('\n') outSeg.close() outPos.close()
def GetLatestNews(input_file): # latest_news = [] # if os.path.exists(input_file): # lines = GetLastLines(input_file) # for line in lines: # latest_news.append(line.split('\t')[1]) # return latest_news latest_news = {} if os.path.exists(input_file): input = codecs.open(input_file, encoding = 'utf-8') lines = input.readlines() for line in lines: latest_news[line.split('\t')[1]] = 0 input.close() folder = input_file.split('/')[0] date = input_file.split('/')[1].split('.')[0] today = datetime.datetime.strptime(date, '%Y-%m-%d').date() yesterday = str(today - datetime.timedelta(days=1)) input_file = folder + '/' + yesterday + '.txt' if os.path.exists(input_file): input = codecs.open(input_file, encoding = 'utf-8') lines = input.readlines() for line in lines: latest_news[line.split('\t')[1]] = 0 input.close() return latest_news
def run(self, input, output, tokenizer=False, pathBioModel=None): if pathBioModel != None: assert os.path.exists(pathBioModel), pathBioModel if tokenizer: print >> sys.stderr, "Running BLLIP parser with tokenization" firstStageArgs = ["first-stage/PARSE/parseIt", "-l999", "-N50"] else: print >> sys.stderr, "Running BLLIP parser without tokenization" firstStageArgs = ["first-stage/PARSE/parseIt", "-l999", "-N50" , "-K"] secondStageArgs = ["second-stage/programs/features/best-parses", "-l"] if pathBioModel != None: firstStageArgs += [pathBioModel+"/parser/"] secondStageArgs += [pathBioModel+"/reranker/features.gz", pathBioModel+"/reranker/weights.gz"] else: firstStageArgs += ["first-stage/DATA/EN/"] secondStageArgs += ["second-stage/models/ec50spfinal/features.gz", "second-stage/models/ec50spfinal/cvlm-l1c10P1-weights.gz"] print >> sys.stderr, "1st Stage arguments:", firstStageArgs print >> sys.stderr, "2nd Stage arguments:", secondStageArgs firstStage = subprocess.Popen(firstStageArgs, stdin=codecs.open(input, "rt", "utf-8"), stdout=subprocess.PIPE) secondStage = subprocess.Popen(secondStageArgs, stdin=firstStage.stdout, stdout=codecs.open(output, "wt", "utf-8")) return ProcessUtils.ProcessWrapper([firstStage, secondStage])
def mksnt(voc_f,txt_f,ext=True,verbose=0): voc = open(voc_f,'r',encoding='utf-8') data = [] dic = {} n_snt = 0 n_wrd = 0 for ii,txt in enumerate(open(txt_f,'r',encoding='utf-8')): n_snt += 1 words = txt.strip().split() for word in words: n_wrd += 1 dic[word] = 0 data.append(words) if verbose: sys.stderr.write(repr(n_snt)+' sents, '+repr(n_wrd)+' words, '+repr(len(dic))+' vocab\n') n_voc = 0 max_voc = 0 for ii,txt in enumerate(open(voc_f,'r',encoding='utf-8')): n_voc += 1 a,w = txt.split()[:2] a = int(a) if a > max_voc: max_voc = a v = dic.get(w) if v is None: continue if v == 0: dic[w] = int(a) continue if verbose > 1: sys.stderr.write('collision: dic '+repr(v)+' vs. voc '+a+"\n") if verbose: sys.stderr.write('vsz = '+repr(n_voc)+"\n") oov = set() i_ext = max_voc + 1 for w in dic: if dic[w] == 0: dic[w] = i_ext i_ext += 1 oov.add(w) for words in data: for word in words: v = dic.get(word) print v, print if verbose: sys.stderr.write('oov = '+repr(n_ext-max_voc)+"\n") if ext: for w in oov: sys.stderr.write(repr(dic[w])+' '+w.encode('utf-8')+' 1\n') return 0
def main(): if len(argv) < 3: exit('usage: %s quran_file trans_file' % argv[0]) charset = 'utf-8' quran_file = argv[1] #prepare surah numbers to be splitted surah_numbers = set() # read quran translation file and split each surah in a list surah = [] description = [] surah_trans = {} trans_lines = open(quran_file, 'U', charset).read().split('\n') current = 1 for line in trans_lines: #line = str(line).strip().replace('\xef\xbb\xbf', '') if line=='' or line.startswith('#'): description.append(line) continue parts = line.rpartition('|') surah.append(parts[2]) #dest = ''.join( [ quran_file, ".trans"]) dest = argv[2]; open(dest, 'w', charset).writelines(linesep.join(surah))
def wrap_encrypt(path, password): """Wrap a post with encryption.""" with codecs.open(path, 'rb+', 'utf8') as inf: data = inf.read() + "<!--tail-->" data = CRYPT.substitute(data=rc4(password, data)) with codecs.open(path, 'wb+', 'utf8') as outf: outf.write(data)
def chg_sys_time(request): current_time = time.strftime('%Y-%m-%d %H:%M:%S') d = time.strftime('{"Y": "%Y", "M": "%m", "D": "%d", "H": "%H", "I": "%M", "S": "%S", "W": "%w"}') if request.POST: flag = request.POST.get('flag', None) if not flag: tname = request.POST['name_who'] tchoice = request.POST['date_choice'] tcomment = request.POST['comment'] if com_date(tchoice, current_time): s = subprocess.call("chgtime 'date -d \"{0}\"' >>/mnt/lisp/djchgtiem/templates/data/djchgtiem.log 2>&1".format(tchoice), shell=True) if not s.real: logline = u"{0} 在{1} 把时间切到了{2},理由是:{3}".format(tname, current_time, tchoice, tcomment) base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) with codecs.open(base_dir + '/templates/data/record.log', 'r', 'utf-8') as original: data = original.read() with codecs.open(base_dir + '/templates/data/record.log', 'w', 'utf-8') as fw: fw.write("<p>" + logline + "</p>\n") fw.write(data) return HttpResponse("<p>" + logline + "</p>") else: return HttpResponse(u"<p> 日切失败!请联系管理员~ </p>") else: return HttpResponse(u"<p> 您选择的时间小于当前时间! </p>") else: return HttpResponse(d)
def cross_texts(pText1, pText2, pOutput): FILE_TEXT1 = pText1 FILE_TEXT2 = pText2 FILE_OUTPUT = pOutput file1 = codecs.open(FILE_TEXT1, "r", "utf-8") file2 = codecs.open(FILE_TEXT2, "r", "utf-8") file3 = codecs.open(FILE_OUTPUT, "wb", "utf-8") content_file1 = file1.readlines() content_file2 = file2.readlines() #exclude the first seven elements cause they do not contribute in anything list1 = remove_garbage(7, content_file1) list2 = remove_garbage(7, content_file2) #Cross each term from text1 with all terms from text2 for x in list1: for y in list2: file3.write(x + ": \n".encode("utf-8")) file3.write(y + ": \n".encode("utf-8")) file3.write(x + "_" + y + ":\n".encode("utf-8")) file1.close() file2.close() file3.close()
def file_changed(self, path): response = False SAVE = u"名前をつけて保存" RELOAD = u"ファイルを再読込" CANCEL = u"キャンセル" message = QtGui.QMessageBox(self) message.setText(u"ファイルは変更されています") message.setWindowTitle("Notepad") message.setIcon(QtGui.QMessageBox.Warning) message.addButton(SAVE, QtGui.QMessageBox.AcceptRole) message.addButton(RELOAD, QtGui.QMessageBox.DestructiveRole) message.addButton(CANCEL, QtGui.QMessageBox.RejectRole) message.setDetailedText(str(path) + u" は、他のアプリケーションで内容を変更されたか削除されました。どうしますか?") message.exec_() response = message.clickedButton().text() if response == SAVE: fd = QtGui.QFileDialog(self) newfile = fd.getSaveFileName() if newfile: s = codecs.open(newfile, "w", "utf-8") s.write(unicode(self.ui.editor_window.toPlainText())) s.close() self.ui.button_save.setEnabled(False) if self.filename and str(newfile) != str(self.filename): self.watcher.removePath(self.filename) self.watcher.addPath(self.filename) self.filename = newfile elif response == RELOAD: s = codecs.open(self.filename, "r", "utf-8").read() self.ui.editor_window.setPlainText(s) self.ui.button_save.setEnabled(False)
def prepare(self): try: cssFile = codecs.open(self._cssFilename, "r", "utf-8") except IOError: self._outLogFunc("** Warning: Could not open stylesheet file; the output HTML will be ugly.") cssContent = "" else: cssContent = cssFile.read() cssFile.close() self._outfile = codecs.open(self._filename + ".html", "w", "utf-8") self._outfile.write('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">\n') self._outfile.write('<html xmlns="http://www.w3.org/1999/xhtml">\n') self._outfile.write('\t<head>\n') self._outfile.write('\t\t<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />\n') self._outfile.write((b'\t\t<title>' + self._displayName.encode("utf-8") + b'\'s plurk Backup</title>\n').decode("utf-8")) self._outfile.write('\t\t<style type="text/css">\n') self._outfile.write(cssContent) self._outfile.write('\t\t</style>\n') self._outfile.write('\t</head>\n') self._outfile.write('\t<body>\n') self._outfile.write((b'\t\t<h1>' + self._displayName.encode("utf-8") + b'\'s plurk Backup</h1>\n').decode("utf-8")) self._outfile.write('\t\t<p class="smallnote">\n') self._outfile.write('\t\t\tClick on a plurk\'s timestamp to go to its page on plurk.com .\n') self._outfile.write('\t\t</p>\n')
def process_config(config_path, py3_wrapper=None): """ Parse i3status.conf so we can adapt our code to the i3status config. """ def notify_user(error): if py3_wrapper: py3_wrapper.notify_user(error) else: print(error) def parse_config(config): """ Parse text or file as a py3status config file. """ if hasattr(config, "readlines"): config = "".join(config.readlines()) parser = ConfigParser(config, py3_wrapper) parser.parse() parsed = parser.config del parser return parsed def parse_config_error(e, config_path): # There was a problem use our special error config error = e.one_line(config_path) notify_user(error) # to display correctly in i3bar we need to do some substitutions for char in ['"', "{", "|"]: error = error.replace(char, "\\" + char) error_config = Template(ERROR_CONFIG).substitute(error=error) return parse_config(error_config) config = {} # get the file encoding this is important with multi-byte unicode chars try: encoding = check_output( ["file", "-b", "--mime-encoding", "--dereference", config_path]) encoding = encoding.strip().decode("utf-8") except CalledProcessError: # bsd does not have the --mime-encoding so assume utf-8 encoding = "utf-8" try: with codecs.open(config_path, "r", encoding) as f: try: config_info = parse_config(f) except ParseException as e: config_info = parse_config_error(e, config_path) except LookupError: with codecs.open(config_path) as f: try: config_info = parse_config(f) except ParseException as e: config_info = parse_config_error(e, config_path) # update general section with defaults general_defaults = GENERAL_DEFAULTS.copy() if "general" in config_info: general_defaults.update(config_info["general"]) config["general"] = general_defaults config["py3status"] = config_info.get("py3status", {}) modules = {} on_click = {} i3s_modules = [] py3_modules = [] module_groups = {} def process_onclick(key, value, group_name): """ Check on_click events are valid. Store if they are good """ button_error = False button = "" try: button = key.split()[1] if int(button) not in range(1, 20): button_error = True except (ValueError, IndexError): button_error = True if button_error: err = "Invalid on_click for `{}`. Number not in range 1-20: `{}`." notify_user(err.format(group_name, button)) return False clicks = on_click.setdefault(group_name, {}) clicks[button] = value return True def get_module_type(name): """ i3status or py3status? """ if name.split()[0] in I3S_MODULE_NAMES: return "i3status" return "py3status" def process_module(name, module, parent): if parent: modules[parent]["items"].append(name) mg = module_groups.setdefault(name, []) mg.append(parent) if get_module_type(name) == "py3status": module[".group"] = parent # check module content for k, v in list(module.items()): if k.startswith("on_click"): # on_click event process_onclick(k, v, name) # on_click should not be passed to the module via the config. del module[k] if isinstance(v, ModuleDefinition): # we are a container module["items"] = [] return module def get_modules(data, parent=None): for k, v in data.items(): if isinstance(v, ModuleDefinition): module = process_module(k, v, parent) modules[k] = module get_modules(v, parent=k) get_modules(config_info) config["order"] = [] def remove_any_contained_modules(module): """ takes a module definition and returns a dict without any modules that may be defined with it. """ fixed = {} for k, v in module.items(): if not isinstance(v, ModuleDefinition): fixed[k] = v return fixed def append_modules(item): module_type = get_module_type(item) if module_type == "i3status": if item not in i3s_modules: i3s_modules.append(item) else: if item not in py3_modules: py3_modules.append(item) def add_container_items(module_name): module = modules.get(module_name, {}) items = module.get("items", []) for item in items: if item in config: continue append_modules(item) module = modules.get(item, {}) config[item] = remove_any_contained_modules(module) # add any children add_container_items(item) # create config for modules in order for name in config_info.get("order", []): module_name = name.split(" ")[0] if module_name in RETIRED_MODULES: notify_user( "Module `{}` is no longer available".format(module_name) + ". Alternative modules are: {}.".format(", ".join( "`{}`".format(x) for x in RETIRED_MODULES[module_name]))) continue module = modules.get(name, {}) config["order"].append(name) add_container_items(name) append_modules(name) config[name] = remove_any_contained_modules(module) config["on_click"] = on_click config["i3s_modules"] = i3s_modules config["py3_modules"] = py3_modules config[".module_groups"] = module_groups # time and tztime modules need a format for correct processing for name in config: if name.split()[0] in TIME_MODULES and "format" not in config[name]: if name.split()[0] == "time": config[name]["format"] = TIME_FORMAT else: config[name]["format"] = TZTIME_FORMAT if not config["order"]: notify_user("Your configuration file does not list any module" ' to be loaded with the "order" directive.') return config
def _parse_in_more_detail_XML(self): """Parse unimod xml. Returns: list: list of dicts with information regarding a unimod """ data_list = [] for xml_path in self.unimod_xml_names: xml_path = Path(xml_path) if os.path.exists(xml_path) is False: logger.warning(f"{xml_path} does not exist") continue logger.info("Parsing mod xml file ({0})".format(xml_path)) unimodXML = ET.iterparse( codecs.open(xml_path, "r", encoding="utf8"), events=(b"start", b"end"), ) for event, element in unimodXML: if event == b"start": if element.tag.endswith("}mod"): tmp = { "Name": element.attrib["title"], "Accession": str(element.attrib.get("record_id", "")), "Description": element.attrib.get("full_name", ""), "elements": {}, "specificity": [], "PSI-MS approved": False, } if element.attrib.get("approved", "0") == "1": tmp["PSI-MS approved"] = True tmp["PSI-MS Name"] = element.attrib["title"] elif element.tag.endswith("}delta"): tmp["mono_mass"] = float(element.attrib["mono_mass"]) elif element.tag.endswith("}alt_name"): tmp["Alt Description"] = element.text else: pass else: # end mod if element.tag.endswith("}delta"): tmp["elements"] = self._extract_elements(element) elif element.tag.endswith("}specificity"): amino_acid = element.attrib["site"] classification = element.attrib["classification"] if classification == "Artefact": continue neutral_loss_elements = {} neutral_loss_mass = 0 if len(element) > 0: for sub_element in element.iter(): if (sub_element.tag.endswith("}NeutralLoss") and len(sub_element) > 0): neutral_loss_elements = self._extract_elements( sub_element) neutral_loss_mass = float( sub_element.attrib["mono_mass"]) tmp["specificity"].append( f"{amino_acid}<|>{classification}<|>{neutral_loss_elements}<|>{neutral_loss_mass}" ) elif element.tag.endswith("}mod"): data_list.append(tmp) else: pass return data_list
table_list = ['nodes', 'nodes_tags', 'ways', 'ways_tags', 'ways_nodes'] con = sqlite3.connect("safety_harbor.db") cur = con.cursor() #drop tables if they exists so we do not insert repeat data for tablename in table_list: stmt = "DROP TABLE IF EXISTS " + tablename cur.execute(stmt) con.commit() # create nodes table cur.execute("CREATE TABLE IF NOT EXISTS nodes (id, lat, lon, user, uid, version, changeset, timestamp);") # load table with codecs.open('nodes.csv', encoding='utf-8-sig') as fin: dr = csv.DictReader(fin) pprint.pprint(dr.fieldnames) to_db = [(i['id'], i['lat'], i['lon'], i['user'], i['uid'], i['version'], i['changeset'], i['timestamp']) for i in dr] cur.executemany("INSERT INTO nodes (id, lat, lon, user, uid, version, changeset, timestamp) \ VALUES (?, ?, ?, ?, ?, ?, ?, ?);", to_db) # create nodes_tags table cur.execute("CREATE TABLE IF NOT EXISTS nodes_tags (id, key, value, type);") # load table with codecs.open('nodes_tags.csv', encoding='utf-8-sig') as fin: dr = csv.DictReader(fin) pprint.pprint(dr.fieldnames)
def writeaveragemaxminarray(file, average, maxsetspec, minsetspec): output = codecs.open(file, 'w') linenumber = 0 havekey = 0 if len(average) == 1: averagemax = (np.array(average[0])) * (1 + maxsetspec) averagemin = (np.array(average[0])) * (1 - minsetspec) havekey = 0 elif len(average) == 2: havekey = 1 averagemax = (np.array(average[0])) * (1 + maxsetspec) averagemin = (np.array(average[0])) * (1 - minsetspec) keymax = (np.array(average[1])) * (1 + maxsetspec) keymin = (np.array(average[1])) * (1 - minsetspec) '''print(averagemax) print(averagemin) print(keymax) print(keymin)''' for line in averagemax: output.write('CM_DELTA_MAX_ROW' + str("%02d" % linenumber) + ' ' + '=' + ' ') inumber = 0 for avdata in line: if avdata == 0: avdata = 5 else: pass if inumber == (len(line) - 1): output.write(str(int(avdata)) + '\n') else: output.write(str(int(avdata)) + ',' + ' ') inumber += 1 linenumber += 1 if havekey == 1: output.write('CM_DELTA_MAX_KEY' + ' ' + '=' + ' ') inumber = 0 for i in keymax: if i == 0: i = 5 else: pass if inumber == (len(keymax) - 1): output.write(str(int(i)) + '\n') else: output.write(str(int(i)) + ',' + ' ') inumber += 1 output.write('\n' + '\n' + '; cm delta min' + '\n') linenumber = 0 for line in averagemin: output.write('CM_DELTA_MIN_ROW' + str("%02d" % linenumber) + ' ' + '=' + ' ') inumber = 0 for i in line: if inumber == (len(line) - 1): output.write(str(int(i)) + '\n') else: output.write(str(int(i)) + ',' + ' ') inumber += 1 linenumber += 1 if havekey == 1: inumber = 0 output.write('CM_DELTA_MIN_KEY' + ' ' + '=' + ' ') for i in keymin: if i == 0: i = 5 else: pass if inumber == (len(keymin) - 1): output.write(str(int(i)) + '\n') else: output.write(str(int(i)) + ',' + ' ') inumber += 1 output.close()
def filedirectorycatchdata( filedirectory): #获取log csv文件数据 输出格式为[[all屏体数据][all按键数据]]数组 global L, usefuldatafile listfile = os.listdir(filedirectory) L = [ filename for filename in listfile if filename[-4:] == '.csv' and not filename.find('summary') != -1 ] print(' ' + '-' * 19 + '导入文件' + '-' * 20) alldata = [] allsampledata = [] allsamplekeydata = [] allsamplecptestdata = [] allsamplecpshortdata = [] nodatafile = [] usefuldatafile = [] for fileadr in L: try: #解决文件存在非法编码导致无法linecache问题 linecache.updatecache(filedirectory + fileadr) linefile = linecache.getlines(filedirectory + fileadr) except Exception: print(str(fileadr) + '该文件数据存在非法字符') newfile = codecs.open(filedirectory + fileadr, 'r', 'gbk', 'ignore') text = newfile.read() newfile.close() with codecs.open(filedirectory + fileadr, 'w') as newfile2: newfile2.write(text) linecache.updatecache(filedirectory + fileadr) linefile = linecache.getlines(filedirectory + fileadr) '''print(filedirectory+fileadr)''' linenumber = 0 starline = 0 endline = 0 sampledata = [] keyarray = [] cpteststartline = 0 cptestendline = 0 cpshortstartline = 0 cpshortendline = 0 sampledata = [] keyarray = [] samplecpselfdata = [] samplecpshortdata = [] for line in linefile: linenumber += 1 if line.find('CMDelta Test Start') != -1: starline = linenumber if line.find('CMDelta Test End') != -1: endline = linenumber if line.find('Self Cp Test Start') != -1: #加入Self CP test cpteststartline = linenumber if line.find('Self Cp Test End') != -1: cptestendline = linenumber if line.find('CP_SHORT Test Start') != -1: #加入CP Short test cpshortstartline = linenumber #print(cpshortstartline) if line.find('CP_SHORT Test End') != -1: cpshortendline = linenumber datanumber = 0 if starline != 0 and endline != 0: dataline = linefile[starline:endline] for data in dataline: if data.find('[Row00]') != -1: datastar = datanumber if data.find('CM Delta Key') != -1: dataend = datanumber datanumber += 1 keydata = dataline[dataend:endline] del keydata[0] del keydata[-1] keyarray = [] for k in keydata: if k == '\n': pass else: keyread = k.split(',') keyrealdata = keyread[:-1] for i in keyrealdata: if i == '' or i == '\n': pass else: newkey = (((((i.replace('[', '')).replace( ']', '')).replace('{', '')).replace( '}', '')).replace('\n', '')).replace('\t', '') keyarray.append(int(newkey)) data = dataline[datastar:dataend - 1] for datare in data: if datare == '\n': pass else: dataread = datare.split(',') d = dataread[1:] slist = [] for s in d: if s == '' or s == '\n': pass else: news = (((((s.replace('[', '')).replace( ']', '')).replace('{', '')).replace( '}', '')).replace('\n', '')).replace('\t', '') slist.append(int(news)) if len(slist) != 0: sampledata.append(slist) usefuldatafile.append(str(fileadr)) else: nodatafile.append(str(fileadr)) if (len(sampledata) != 0): allsampledata.append(sampledata) if (len(keyarray) != 0): allsamplekeydata.append(keyarray) if cpteststartline != 0 and cptestendline != 0: #提取 Self CP 测试数据 #print('try to catch self cp data') selfcpdatanumber = 0 selfcpdataline = linefile[cpteststartline:cptestendline] for selfcpdata in selfcpdataline: if selfcpdata.find('Row00') != -1: selfdatastart = selfcpdatanumber if selfcpdata.find(' Self Cp Test End') != -1: selfdataend = selfcpdatanumber selfcpdatanumber += 1 selfcpdatafile = selfcpdataline[selfdatastart:selfdataend] for datare in selfcpdatafile: if datare == '\n': pass else: dataread = datare.split(',') d = dataread[1:] slist2 = [] for s in d: if s == '' or s == '\n': pass else: news = (((((s.replace('[', '')).replace( ']', '')).replace('{', '')).replace( '}', '')).replace('\n', '')).replace('\t', '') slist2.append(int(news)) if len(slist2) != 0: samplecpselfdata.append(slist2) if (len(samplecpselfdata) != 0): #print(samplecpselfdata) allsamplecptestdata.append(samplecpselfdata) if cpshortstartline != 0 and cpshortendline != 0: #提取 CP SHORT 测试数据 #print('try to catch SHORT data') selfshortnumber = 0 cpshortline = linefile[cpshortstartline:cpshortendline] #print(cpshortline) for cpshortdata in cpshortline: if cpshortdata.find('Row00') != -1: cpshortstart = selfshortnumber if cpshortdata.find(' CP_SHORT Test End') != -1: cpshortend = selfshortnumber selfshortnumber += 1 cpshortfile = cpshortline[cpshortstart:cpshortend] #print(cpshortfile) for datare in cpshortfile: if datare == '\n': pass else: dataread = datare.split(',') d = dataread[1:] slist3 = [] for s in d: if s == '' or s == '\n': pass else: news = (((((s.replace('[', '')).replace( ']', '')).replace('{', '')).replace( '}', '')).replace('\n', '')).replace('\t', '') slist3.append(int(news)) if len(slist3) != 0: samplecpshortdata.append(slist3) if (len(samplecpshortdata) != 0): #print(samplecpshortdata) allsamplecpshortdata.append(samplecpshortdata) print('*' * 19 + '数据不存在样品' + '*' * 19) print(nodatafile) print('\n') '''print('-'*19+'有效文件'+'-'*19) print(usefuldatafile)''' alldata.append(allsampledata) if (len(allsamplekeydata) != 0): alldata.append(allsamplekeydata) return alldata
def Maketestdataspec(inputfiledirectory, vatargetpercent, keytargetpercent, outputaveragefile, alldataoutputfile, outputmaxsetspec, outputminsetspec): def filedirectorycatchdata( filedirectory): #获取log csv文件数据 输出格式为[[all屏体数据][all按键数据]]数组 global L, usefuldatafile listfile = os.listdir(filedirectory) L = [ filename for filename in listfile if filename[-4:] == '.csv' and not filename.find('summary') != -1 ] print(' ' + '-' * 19 + '导入文件' + '-' * 20) alldata = [] allsampledata = [] allsamplekeydata = [] allsamplecptestdata = [] allsamplecpshortdata = [] nodatafile = [] usefuldatafile = [] for fileadr in L: try: #解决文件存在非法编码导致无法linecache问题 linecache.updatecache(filedirectory + fileadr) linefile = linecache.getlines(filedirectory + fileadr) except Exception: print(str(fileadr) + '该文件数据存在非法字符') newfile = codecs.open(filedirectory + fileadr, 'r', 'gbk', 'ignore') text = newfile.read() newfile.close() with codecs.open(filedirectory + fileadr, 'w') as newfile2: newfile2.write(text) linecache.updatecache(filedirectory + fileadr) linefile = linecache.getlines(filedirectory + fileadr) '''print(filedirectory+fileadr)''' linenumber = 0 starline = 0 endline = 0 sampledata = [] keyarray = [] cpteststartline = 0 cptestendline = 0 cpshortstartline = 0 cpshortendline = 0 sampledata = [] keyarray = [] samplecpselfdata = [] samplecpshortdata = [] for line in linefile: linenumber += 1 if line.find('CMDelta Test Start') != -1: starline = linenumber if line.find('CMDelta Test End') != -1: endline = linenumber if line.find('Self Cp Test Start') != -1: #加入Self CP test cpteststartline = linenumber if line.find('Self Cp Test End') != -1: cptestendline = linenumber if line.find('CP_SHORT Test Start') != -1: #加入CP Short test cpshortstartline = linenumber #print(cpshortstartline) if line.find('CP_SHORT Test End') != -1: cpshortendline = linenumber datanumber = 0 if starline != 0 and endline != 0: dataline = linefile[starline:endline] for data in dataline: if data.find('[Row00]') != -1: datastar = datanumber if data.find('CM Delta Key') != -1: dataend = datanumber datanumber += 1 keydata = dataline[dataend:endline] del keydata[0] del keydata[-1] keyarray = [] for k in keydata: if k == '\n': pass else: keyread = k.split(',') keyrealdata = keyread[:-1] for i in keyrealdata: if i == '' or i == '\n': pass else: newkey = (((((i.replace('[', '')).replace( ']', '')).replace('{', '')).replace( '}', '')).replace('\n', '')).replace('\t', '') keyarray.append(int(newkey)) data = dataline[datastar:dataend - 1] for datare in data: if datare == '\n': pass else: dataread = datare.split(',') d = dataread[1:] slist = [] for s in d: if s == '' or s == '\n': pass else: news = (((((s.replace('[', '')).replace( ']', '')).replace('{', '')).replace( '}', '')).replace('\n', '')).replace('\t', '') slist.append(int(news)) if len(slist) != 0: sampledata.append(slist) usefuldatafile.append(str(fileadr)) else: nodatafile.append(str(fileadr)) if (len(sampledata) != 0): allsampledata.append(sampledata) if (len(keyarray) != 0): allsamplekeydata.append(keyarray) if cpteststartline != 0 and cptestendline != 0: #提取 Self CP 测试数据 #print('try to catch self cp data') selfcpdatanumber = 0 selfcpdataline = linefile[cpteststartline:cptestendline] for selfcpdata in selfcpdataline: if selfcpdata.find('Row00') != -1: selfdatastart = selfcpdatanumber if selfcpdata.find(' Self Cp Test End') != -1: selfdataend = selfcpdatanumber selfcpdatanumber += 1 selfcpdatafile = selfcpdataline[selfdatastart:selfdataend] for datare in selfcpdatafile: if datare == '\n': pass else: dataread = datare.split(',') d = dataread[1:] slist2 = [] for s in d: if s == '' or s == '\n': pass else: news = (((((s.replace('[', '')).replace( ']', '')).replace('{', '')).replace( '}', '')).replace('\n', '')).replace('\t', '') slist2.append(int(news)) if len(slist2) != 0: samplecpselfdata.append(slist2) if (len(samplecpselfdata) != 0): #print(samplecpselfdata) allsamplecptestdata.append(samplecpselfdata) if cpshortstartline != 0 and cpshortendline != 0: #提取 CP SHORT 测试数据 #print('try to catch SHORT data') selfshortnumber = 0 cpshortline = linefile[cpshortstartline:cpshortendline] #print(cpshortline) for cpshortdata in cpshortline: if cpshortdata.find('Row00') != -1: cpshortstart = selfshortnumber if cpshortdata.find(' CP_SHORT Test End') != -1: cpshortend = selfshortnumber selfshortnumber += 1 cpshortfile = cpshortline[cpshortstart:cpshortend] #print(cpshortfile) for datare in cpshortfile: if datare == '\n': pass else: dataread = datare.split(',') d = dataread[1:] slist3 = [] for s in d: if s == '' or s == '\n': pass else: news = (((((s.replace('[', '')).replace( ']', '')).replace('{', '')).replace( '}', '')).replace('\n', '')).replace('\t', '') slist3.append(int(news)) if len(slist3) != 0: samplecpshortdata.append(slist3) if (len(samplecpshortdata) != 0): #print(samplecpshortdata) allsamplecpshortdata.append(samplecpshortdata) print('*' * 19 + '数据不存在样品' + '*' * 19) print(nodatafile) print('\n') '''print('-'*19+'有效文件'+'-'*19) print(usefuldatafile)''' alldata.append(allsampledata) if (len(allsamplekeydata) != 0): alldata.append(allsamplekeydata) return alldata def makespec(testsampledata, targetpercent): def makeaverage(sampledata2): b = (np.array(sampledata2[0])) * 0 for i in sampledata2: j = np.array(i) b = b + j average = b // (len(sampledata2)) return average havengsample = 1 ngfileadr = [] while havengsample == 1: print('-' * 19 + '判断良品中' + '-' * 19) print('数目:', len(testsampledata)) print('\n') sampleaverage = makeaverage(testsampledata) percentarray = [] diffvaluearray = [] for data in testsampledata: specvalue = abs(((np.array(data)) / sampleaverage) - 1) percentarray.append(specvalue) diffvalue = abs((np.array(data) - sampleaverage)) diffvaluearray.append(diffvalue) testsamplenumber = 0 samplenumber = 0 ngsamplenumber = [] havengsample = 0 percentarray = np.nan_to_num(percentarray) diffvaluearray = np.nan_to_num(diffvaluearray) for samplepercent in percentarray: maxpercent = np.max(samplepercent) if maxpercent >= targetpercent: singellinepercent = samplepercent.flatten( ) #样品数据从二维变为一维方便比较 singellinediff = ( diffvaluearray[testsamplenumber] ).flatten() #样品测试数值与average值的差值从二维变为一维方便比较 b = np.arange(len(singellinepercent)) c = b[ singellinepercent >= targetpercent] # c array 存放的是单个样品中大于targetpercent位置的索引 for i in range(len(c)): if singellinediff[c[i]] > 5: havengsample = 1 ngsamplenumber.append(testsamplenumber) del testsampledata[samplenumber] samplenumber -= 1 break testsamplenumber += 1 samplenumber += 1 if havengsample == 1: for ng in ngsamplenumber: ngfileadr.append(L[ng]) print('*' * 19 + 'VA区不良样品' + '*' * 19) print(ngfileadr) print('VA区不良样品总数:', len(ngfileadr)) print('\n') '''print(sampleaverage)''' return sampleaverage def makekeyspec(samplekeydata, targetpercent): def makekeyaverage(data): b = np.array(data[0]) * 0 for i in data: j = np.array(i) b = b + j average = b // len(data) return average havengsample = 1 ngfileadr = [] while havengsample == 1: print('-' * 19 + '判断按键良品中' + '-' * 19) print('数目:', len(samplekeydata)) samplekeyaverage = makekeyaverage(samplekeydata) percentarray = [] diffvaluearray = [] for data in samplekeydata: specvalue = abs((((np.array(data)) / samplekeyaverage) - 1)) percentarray.append(specvalue) diffvalue = abs((np.array(data)) - samplekeyaverage) diffvaluearray.append(diffvalue) testsamplenumber = 0 samplenumber = 0 ngsamplenumber = [] havengsample = 0 percentarray = np.nan_to_num(percentarray) diffvaluearray = np.nan_to_num(diffvaluearray) for samplepercent in percentarray: maxpercent = np.max(samplepercent) if maxpercent >= targetpercent: maxlocation = np.where( samplepercent == np.max(samplepercent)) maxdatanumbers = len(maxlocation) diffarray = [] while (maxdatanumbers >= 1): x = 0 row = maxlocation[x] diff = diffvaluearray[testsamplenumber][row] diffarray.append(diff) maxdatanumbers -= 1 x += 1 maxdiff = np.max(diffarray) if (maxdiff <= 5): samplenumber += 1 break else: havengsample = 1 ngsamplenumber.append(testsamplenumber) del samplekeydata[samplenumber] testsamplenumber += 1 else: samplenumber += 1 testsamplenumber += 1 if havengsample == 1: for ng in ngsamplenumber: ngfileadr.append(L[ng]) print('*' * 19 + '按键不良样品' + '*' * 19) print(ngfileadr) print('\n') return samplekeyaverage def writeaveragearray(file, average): output = codecs.open(file, 'w') linenumber = 0 for line in average: output.write('CM_DELTA_ROW' + str("%02d" % linenumber) + ' ' + '=' + ' ') inumber = 0 for i in line: if inumber == (len(line) - 1): output.write(str(i) + '\n') else: output.write(str(i) + ',' + ' ') inumber += 1 linenumber += 1 output.close() def writeaveragemaxminarray(file, average, maxsetspec, minsetspec): output = codecs.open(file, 'w') linenumber = 0 havekey = 0 if len(average) == 1: averagemax = (np.array(average[0])) * (1 + maxsetspec) averagemin = (np.array(average[0])) * (1 - minsetspec) havekey = 0 elif len(average) == 2: havekey = 1 averagemax = (np.array(average[0])) * (1 + maxsetspec) averagemin = (np.array(average[0])) * (1 - minsetspec) keymax = (np.array(average[1])) * (1 + maxsetspec) keymin = (np.array(average[1])) * (1 - minsetspec) '''print(averagemax) print(averagemin) print(keymax) print(keymin)''' for line in averagemax: output.write('CM_DELTA_MAX_ROW' + str("%02d" % linenumber) + ' ' + '=' + ' ') inumber = 0 for avdata in line: if avdata == 0: avdata = 5 else: pass if inumber == (len(line) - 1): output.write(str(int(avdata)) + '\n') else: output.write(str(int(avdata)) + ',' + ' ') inumber += 1 linenumber += 1 if havekey == 1: output.write('CM_DELTA_MAX_KEY' + ' ' + '=' + ' ') inumber = 0 for i in keymax: if i == 0: i = 5 else: pass if inumber == (len(keymax) - 1): output.write(str(int(i)) + '\n') else: output.write(str(int(i)) + ',' + ' ') inumber += 1 output.write('\n' + '\n' + '; cm delta min' + '\n') linenumber = 0 for line in averagemin: output.write('CM_DELTA_MIN_ROW' + str("%02d" % linenumber) + ' ' + '=' + ' ') inumber = 0 for i in line: if inumber == (len(line) - 1): output.write(str(int(i)) + '\n') else: output.write(str(int(i)) + ',' + ' ') inumber += 1 linenumber += 1 if havekey == 1: inumber = 0 output.write('CM_DELTA_MIN_KEY' + ' ' + '=' + ' ') for i in keymin: if i == 0: i = 5 else: pass if inumber == (len(keymin) - 1): output.write(str(int(i)) + '\n') else: output.write(str(int(i)) + ',' + ' ') inumber += 1 output.close() averagedata = [] sampleplotdata = [] #def makecpselfspec(samplecpdata,maxspec,minspec): if len(filedirectorycatchdata( inputfiledirectory)) == 1 and len(usefuldatafile) != 0: dataoutput = codecs.open(alldataoutputfile + 'alldata.csv', 'w+') d = filedirectorycatchdata(inputfiledirectory) for i in range(len(d[0])): dataoutput.write(str(usefuldatafile[i]) + ',') dataoutput.write( ((str((np.array(d[0][i]).flatten()).tolist())).replace( '[', '')).replace(']', '')) dataoutput.write('\n') sampleplotdata.append((np.array(d[0][i]).flatten()).tolist()) dataoutput.close() averagedata.append(makespec(d[0], vatargetpercent)) elif len(filedirectorycatchdata( inputfiledirectory)) == 2 and len(usefuldatafile) != 0: dataoutput = codecs.open(alldataoutputfile + 'alldata.csv', 'w+') d = filedirectorycatchdata(inputfiledirectory) for i in range(len(d[0])): dataoutput.write(str(usefuldatafile[i]) + ',') dataoutput.write( ((str(((np.array(d[0][i]).flatten()).tolist()) + ((np.array(d[1][i]).flatten()).tolist()))).replace( '[', '')).replace(']', '')) dataoutput.write('\n') sampleplotdata.append(((np.array(d[0][i]).flatten()).tolist()) + ((np.array(d[1][i]).flatten()).tolist())) dataoutput.close() averagedata.append(makespec(d[0], vatargetpercent)) averagedata.append(makekeyspec(d[1], keytargetpercent)) writeaveragemaxminarray(outputaveragefile, averagedata, outputmaxsetspec, outputminsetspec) print('<<<<<<<<<样品数据已保存在Tarnsitdata文件夹>>>>>>>>>') return sampleplotdata
return res def get_comments(post_id, offset=0): r = requests.get( COMMENTS_ENDPOINT.format(owner_id=OWNER_ID, post_id=post_id, offset=offset)) try: res = r.json()['response'][1:] except KeyError: res = [] return res f = codecs.open('out.txt', 'w', encoding='utf-8') count = 0 while count < TOTAL_POSTS: posts = get_posts(offset=count) for p in posts: count += 1 dt = datetime.fromtimestamp(p['date']) post_id = p['id'] print(count, file=f) print(dt.strftime('%Y-%m-%d'), file=f) print(p['text'], file=f) print(POST_URL_TEPMPLATE.format(owner_id=OWNER_ID, post_id=post_id), file=f) comments = get_comments(post_id) for c in comments:
case = 3 else : print("Invalid") inputpath="" while (outputpath==""): outputpath = input("Output Path: ") outputpath = outputpath.replace('"','') outputpath = Path(outputpath) if not outputpath.exists(): outputpath.mkdir() #Input file Generator words = [] uniquewords = [] if case == 1 : with codecs.open(inputpath, "r",encoding='utf-8-sig') as labfile: contents = labfile.read() contents = contents.split(" ") for i in range(len(contents)): if contents[i] not in punctuation: words.append(contents[i]) elif case == 2 : with codecs.open(inputpath, "r",encoding='utf-8-sig') as txtfile: for contents in txtfile: contents = contents.rstrip("\n") words.append(contents) elif case == 3 : for entry in inputpath.iterdir(): if entry.suffix == '.lab': with codecs.open(entry, "r",encoding='utf-8-sig') as labfile: contents = labfile.read()
from codecs import open from setuptools import setup try: from azure_bdist_wheel import cmdclass except ImportError: from distutils import log as logger logger.warn("Wheel is not available, disabling bdist_wheel hook") cmdclass = {} VERSION = "2.0.46" # If we have source, validate that our version numbers match # This should prevent uploading releases with mismatched versions. try: with open('azure/cli/core/__init__.py', 'r', encoding='utf-8') as f: content = f.read() except OSError: pass else: import re import sys m = re.search(r'__version__\s*=\s*[\'"](.+?)[\'"]', content) if not m: print('Could not find __version__ in azure/cli/core/__init__.py') sys.exit(1) if m.group(1) != VERSION: print('Expected __version__ = "{}"; found "{}"'.format(VERSION, m.group(1))) sys.exit(1)
import codecs from setuptools import setup with codecs.open('README.rst', encoding='utf-8') as f: long_description = f.read() setup( name="shadowsocks", version="2.6.9", license='http://www.apache.org/licenses/LICENSE-2.0', description="A fast tunnel proxy that help you get through firewalls", author='clowwindy', author_email='*****@*****.**', url='https://github.com/shadowsocks/shadowsocks', packages=['shadowsocks', 'shadowsocks.crypto'], package_data={'shadowsocks': ['README.rst', 'LICENSE']}, install_requires=[], entry_points=""" [console_scripts] sslocal = shadowsocks.local:main ssserver = shadowsocks.server:main """, classifiers=[ 'License :: OSI Approved :: Apache Software License', 'Programming Language :: Python :: 2', 'Programming Language :: Python :: 2.6', 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.3', 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: Implementation :: CPython',
# (C) Datadog, Inc. 2019-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) from codecs import open # To use a consistent encoding from os import path from setuptools import setup HERE = path.dirname(path.abspath(__file__)) # Get version info ABOUT = {} with open(path.join(HERE, 'datadog_checks', 'clickhouse', '__about__.py')) as f: exec(f.read(), ABOUT) # Get the long description from the README file with open(path.join(HERE, 'README.md'), encoding='utf-8') as f: long_description = f.read() def get_dependencies(): dep_file = path.join(HERE, 'requirements.in') if not path.isfile(dep_file): return [] with open(dep_file, encoding='utf-8') as f: return f.readlines() CHECKS_BASE_REQ = 'datadog-checks-base>=15.0.0'
def getResourceFor(self, request): uri = request.uri uri = uri.split("?", 1)[0] uri = uri.split("#", 1)[0] if uri == '/': # This serves the message, but also throws an exception; can't understand why... result = static.Data( '<html>In production you would now be on https://clipperz.is/</html>', 'text/html') elif uri.startswith('/json') or uri.startswith('/dump'): resource.prepath = ['app'] result = resource.getChildForRequest(self.resource, request) elif uri.startswith('/payment'): resource.prepath = ['payment'] result = resource.getChildForRequest(self.resource, request) elif uri == '/favicon.ico': return else: pathParts = uri.split('/') version = pathParts[1] if pathParts[2].startswith('index.'): print("-> index") contentType = 'text/html' absoluteFilePath = os.path.join(projectTargetDir(), 'dev', version, pathParts[2]) # print("INDEX.HTML absolute path " + str(absoluteFilePath)) result = static.File(absoluteFilePath, contentType) elif pathParts[2].endswith('.webapp'): print("-> webapp") contentType = 'application/x-web-app-manifest+json' absoluteFilePath = os.path.join(projectBaseDir(), 'frontend', version, 'properties', pathParts[2]) result = static.File(absoluteFilePath, contentType) # elif pathParts[2].endswith('.appcache'): elif pathParts[2].endswith('.appcache_disabled'): print("-> appcache") contentType = 'text/cache-manifest' absoluteFilePath = os.path.join(projectBaseDir(), 'frontend', version, 'properties', pathParts[2]) fileContent = codecs.open(absoluteFilePath, 'r', 'utf-8').read() # fileContent = fileContent.replace('@application.version@', str(uuid.uuid1())) fileContent = fileContent.replace('@application.version@', str(round(time.time()))) result = static.Data(str(fileContent), contentType) else: # http://homer.local:8888/beta/css/clipperz/images/loginInfoBackground.png # pathParts: ['', 'beta', 'css', 'clipperz', 'images', 'loginInfoBackground.png'] try: imagePathIndex = pathParts.index('images') resourceType = 'images' for _ in range(2, imagePathIndex): del pathParts[2] except: resourceType = pathParts[2] basePath = projectBaseDir() + '/frontend' if resourceType == 'images': fileExtension = os.path.splitext(uri)[1] if fileExtension == '.png': # print("-> image - png") contentType = 'image/png' elif fileExtension == '.jpg': # print("-> image - jpg") contentType = 'image/jpeg' elif fileExtension == '.gif': # print("-> image - gif") contentType = 'image/gif' else: print "ERROR - unknown image extension: " + fileExtension absoluteFilePath = basePath + '/'.join(pathParts) else: resourceType = pathParts[2] if resourceType == 'css': # print("-> css") contentType = 'text/css' elif resourceType == 'js': # print("-> js") contentType = 'text/javascript' else: # print("-> text/html") contentType = 'text/html' absoluteFilePath = basePath + uri result = static.File(absoluteFilePath, contentType) # print("RESULT\n" + str(result)) return result
# (C) Datadog, Inc. 2018 # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) from codecs import open # To use a consistent encoding from os import path from setuptools import setup HERE = path.abspath(path.dirname(__file__)) # Get version info ABOUT = {} with open(path.join(HERE, "datadog_checks", "kube_proxy", "__about__.py")) as f: exec(f.read(), ABOUT) # Get the long description from the README file with open(path.join(HERE, 'README.md'), encoding='utf-8') as f: long_description = f.read() CHECKS_BASE_REQ = 'datadog_checks_base' setup( name='datadog-kube-proxy', version=ABOUT["__version__"], description='The kube_proxy Check', long_description=long_description, long_description_content_type='text/markdown', keywords='datadog agent kube_proxy check', # The project's main homepage.
import codecs from setuptools import find_packages, setup VERSION_FILE = "pytest_never_sleep/_version.py" with codecs.open("README.md", "r", "utf-8") as fh: long_description = fh.read() setup( name="pytest-never-sleep", # use_scm_version={ # "write_to": VERSION_FILE, # "local_scheme": "dirty-tag", # }, # setup_requires=["setuptools_scm==5.0.2"], author="Denis Korytkin", author_email="*****@*****.**", description= "pytest plugin helps to avoid adding tests without mock `time.sleep`", long_description=long_description, long_description_content_type="text/markdown", url="https://github.com/DKorytkin/pytest-never-sleep", keywords=["py.test", "pytest", "without sleep", "mock time.sleep"], py_modules=[ "pytest_never_sleep.plugin", "pytest_never_sleep.hooks", "pytest_never_sleep.never_sleep", ], packages=find_packages(exclude=["tests*"]), install_requires=["pytest>=3.5.1"],
def tagging(): # 加载配置文件 with open('./config.yml') as file_config: config = yaml.load(file_config) feature_names = config['model_params']['feature_names'] # 读取特征名 use_char_feature = config['model_params']['use_char_feature'] # 初始化embedding shape, dropouts, 预训练的embedding也在这里初始化) feature_weight_shape_dict, feature_weight_dropout_dict, \ feature_init_weight_dict = dict(), dict(), dict() for feature_name in feature_names: feature_weight_shape_dict[feature_name] = \ config['model_params']['embed_params'][feature_name]['shape'] feature_weight_dropout_dict[feature_name] = \ config['model_params']['embed_params'][feature_name]['dropout_rate'] path_pre_train = config['model_params']['embed_params'][feature_name]['path'] if path_pre_train: # 如果特证包含与训练embedding with open(path_pre_train, 'rb') as file_r: feature_init_weight_dict[feature_name] = pickle.load(file_r) # char embedding shape if use_char_feature: feature_weight_shape_dict['char'] = \ config['model_params']['embed_params']['char']['shape'] conv_filter_len_list = config['model_params']['conv_filter_len_list'] conv_filter_size_list = config['model_params']['conv_filter_size_list'] else: conv_filter_len_list = None conv_filter_size_list = None # 加载vocs print "加载字典......" path_vocs = [] if use_char_feature: path_vocs.append(config['data_params']['voc_params']['char']['path']) for feature_name in feature_names: path_vocs.append(config['data_params']['voc_params'][feature_name]['path']) path_vocs.append(config['data_params']['voc_params']['label']['path']) vocs = load_vocs(path_vocs) # 加载数据 print "加载测试集......" sep_str = config['data_params']['sep'] assert sep_str in ['table', 'space'] sep = '\t' if sep_str == 'table' else ' ' max_len = config['model_params']['sequence_length'] word_len = config['model_params']['word_length'] data_dict = init_data( path=config['data_params']['path_test'], feature_names=feature_names, sep=sep, vocs=vocs, max_len=max_len, model='test', use_char_feature=use_char_feature, word_len=word_len) # 加载模型 model = SequenceLabelingModel( sequence_length=config['model_params']['sequence_length'], nb_classes=config['model_params']['nb_classes'], nb_hidden=config['model_params']['bilstm_params']['num_units'], num_layers=config['model_params']['bilstm_params']['num_layers'], feature_weight_shape_dict=feature_weight_shape_dict, feature_init_weight_dict=feature_init_weight_dict, feature_weight_dropout_dict=feature_weight_dropout_dict, dropout_rate=config['model_params']['dropout_rate'], nb_epoch=config['model_params']['nb_epoch'], feature_names=feature_names, batch_size=config['model_params']['batch_size'], train_max_patience=config['model_params']['max_patience'], use_crf=config['model_params']['use_crf'], l2_rate=config['model_params']['l2_rate'], rnn_unit=config['model_params']['rnn_unit'], learning_rate=config['model_params']['learning_rate'], use_char_feature=use_char_feature, conv_filter_size_list=conv_filter_size_list, conv_filter_len_list=conv_filter_len_list, word_length=word_len, path_model=config['model_params']['path_model']) saver = tf.train.Saver() saver.restore(model.sess, config['model_params']['path_model']) # 标记 viterbi_sequences = model.predict(data_dict) # # 写入文件 label_voc = dict() for key in vocs[-1]: label_voc[vocs[-1][key]] = key with codecs.open(config['data_params']['path_test'], 'r', encoding='utf-8') as file_r: sentences = file_r.read().strip().split('\n\n') file_result = codecs.open( config['data_params']['path_result'], 'w', encoding='utf-8') for i, sentence in enumerate(sentences): for j, item in enumerate(sentence.split('\n')): if j < len(viterbi_sequences[i]): file_result.write('%s\t%s\n' % (item, label_voc[viterbi_sequences[i][j]])) else: file_result.write('%s\tO\n' % item) file_result.write('\n') file_result.close()
from setuptools import setup, find_packages from codecs import open from os import path __version__ = '0.0.21' here = path.abspath(path.dirname(__file__)) # Get the long description from the README file with open(path.join(here, 'README.md'), encoding='utf-8') as f: long_description = f.read() # get the dependencies and installs with open(path.join(here, 'requirements.txt'), encoding='utf-8') as f: all_reqs = f.read().split('\n') install_requires = [x.strip() for x in all_reqs if 'git+' not in x] dependency_links = [ x.strip().replace('git+', '') for x in all_reqs if x.startswith('git+') ] setup( name='biocircuits', version=__version__, description= 'Python utilities for the Caltech course BE 150: Design Principles of Genetic Circuits', long_description=long_description, url='https://github.com/justinbois/biocircuits', download_url='https://github.com/justinbois/biocircuits/tarball/' + __version__, license='BSD',
from sklearn import metrics import glob import errno import codecs path1 = 'C:/Users/NoT/Desktop/ML/Project/Stylogenetics/stylogenetics/Hasan Mahbub/*.doc' path2 = 'C:/Users/NoT/Desktop/ML/Project/Stylogenetics/stylogenetics/MZI/MZI/*.doc' path3 = 'C:/Users/NoT/Desktop/ML/Project/Stylogenetics/stylogenetics/Nir Shondhani/Nir Shondhani/*.doc' labels, texts = [], [] val_x, val_y = [], [] files = glob.glob(path1) for name in files: try: with codecs.open(name, 'r', encoding='utf-8') as f: str = f.read() # str = re.sub(' +', ' ', str) str = " ".join(str.split()) labels.append("hm") texts.append(str) except IOError as exc: if exc.errno != errno.EISDIR: raise files = glob.glob(path2) for name in files: try: with codecs.open(name, 'r', encoding='utf-8') as f: str = f.read() # str = re.sub(' +', ' ', str) str = " ".join(str.split())
objectnames = gatherList(objectnames, objectlist, name='objects', required=False) # authenticate apiauth(vip=vip, username=username, domain=domain, password=password, useApiKey=useApiKey) cluster = api('get', 'cluster') print('\nGathering Job Info from %s...\n' % cluster['name']) # outfile now = datetime.now() dateString = now.strftime("%Y-%m-%d") outfile = 'protectedObjectReport-%s-%s.csv' % (cluster['name'], dateString) f = codecs.open(outfile, 'w') # gather info sources = api('get', 'protectionSources?includeVMFolders=true') policies = api('get', 'data-protect/policies', v=2)['policies'] jobs = api('get', 'data-protect/protection-groups?includeTenants=true', v=2) # headings f.write('Cluster Name,Job Name,Environment,Object Name,Object Type,Object Size (MiB),Parent,Policy Name,Policy Link,Archive Target,Direct Archive,Frequency (Minutes),Last Backup,Last Status,Last Run Type,Job Paused,Indexed,Start Time,Time Zone,QoS Policy,Priority,Full SLA,Incremental SLA\n') report = [] for job in sorted(jobs['protectionGroups'], key=lambda j: j['name']): objects = {}
# d = path.dirname(__file__) d = "./source/word_cloud-master/examples" stopwords_path = d + '/wc_cn/stopwords_cn_en.txt' # Chinese fonts must be set font_path = d + '/fonts/SourceHanSerif/SourceHanSerifK-Light.otf' # the path to save worldcloud imgname1 = d + '/wc_cn/LuXun.jpg' imgname2 = d + '/wc_cn/LuXun_colored.jpg' # read the mask / color image taken from back_coloring = imread(d + '/wc_cn/LuXun_color.jpg') # Read the whole text. text = codecs.open(d + '/wc_cn/CalltoArms.txt', "r", "utf-8").read() # userdict_list = ['孔乙己'] # The function for processing text with HaanLP def pyhanlp_processing_txt(text, isUseStopwordsOfHanLP=True): CustomDictionary = JClass("com.hankcs.hanlp.dictionary.CustomDictionary") for word in userdict_list: CustomDictionary.add(word) mywordlist = [] HanLP.Config.ShowTermNature = False CRFnewSegment = HanLP.newSegment("viterbi")
def get_precision(): """ 精度计算,基于人名 """ with open('./config.yml') as file_config: config = yaml.load(file_config) f_answer = codecs.open(config["data_params"]["path_answer"], encoding="utf-8") f_result = codecs.open(config["data_params"]["path_result"], encoding="utf-8") data = f_answer.read() f_answer.close() rows_answer = data.split("\n") items_answer = [[i.split(" ")[0], i.split(" ")[len(i.split(" "))-1]] for i in rows_answer] data = f_result.read() f_result.close() rows_result = data.split("\n") items_result = [[i.split(" ")[0], i.split(" ")[len(i.split(" ")) - 1]] for i in rows_result] precision_num = 0.0 recall_num = 0.0 correct_num = 0.0 for items in items_result: # print items[0],items[1] try: if items[1][0] == "B" and items[1][2:]=="nrn": precision_num += 1 except: pass for items in items_answer: # print items try: if items[1][0] == "B" and items[1][2:]=="nrn": recall_num += 1 except: pass i = 0 while i < items_answer.__len__(): if items_result[i][1]!="" and items_answer[i][1]!="": if items_result[i][1][0] == "B" and items_result[i][1][0] == items_answer[i][1][0]: j = i while j<items_answer.__len__(): if items_answer[j][1][0] != "E": j+=1 else: break if items_result[j][1][0] == "E" and items_answer[j][1][2:]=="nrn": # print items_answer[j][1], items_result[j][1] correct_num += 1 i = j i += 1 p = correct_num/precision_num r = correct_num/recall_num print("nrn") print ("p:") print(p) print ("r:") print (r) print ("f:") print(2*p*r/(p+r)) # if __name__ == '__main__': # create_testset() # tagging() # 标记测试集 # get_precision() # get_indicator()
def read(*parts): with codecs.open(join(here, *parts), 'r') as fp: return fp.read()
import codecs from setuptools import setup, find_packages with codecs.open('README.md', 'r', 'utf8') as reader: long_description = reader.read() with codecs.open('requirements.txt', 'r', 'utf8') as reader: install_requires = list(map(lambda x: x.strip(), reader.readlines())) setup( name='keras-gpt-2', version='0.13.0', packages=find_packages(), url='https://github.com/CyberZHG/keras-gpt-2', license='MIT', author='CyberZHG', author_email='*****@*****.**', description='GPT-2', long_description=long_description, long_description_content_type='text/markdown', install_requires=install_requires, classifiers=( "Programming Language :: Python :: 3", "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", ), )
See: https://packaging.python.org/en/latest/distributing.html https://github.com/pypa/sampleproject """ # Always prefer setuptools over distutils from setuptools import setup, find_packages # To use a consistent encoding from codecs import open from os import path here = path.abspath(path.dirname(__file__)) # Get the long description from the README file with open(path.join(here, 'README.rst'), encoding='utf-8') as f: long_description = f.read() setup(name='scipion-em-empiar', version='3.0.1', description='A Scipion plugin to make depositions to EMPIAR', long_description=long_description, url='https://github.com/scipion-em/scipion-em-empiar', author='I2PC', author_email='*****@*****.**', keywords='scipion empiar scipion-3.0', packages=find_packages(), install_requires=['empiar-depositor', 'jsonschema', 'scipion-em'], package_data={ 'empiar': [ 'EMPIAR_logo.png', 'empiar_deposition.schema.json',
def main(): global SUBJECT content = "" test_counts = [] attachments = {} updateproc = subprocess.Popen( "cd /opt/sqlmap/ ; python /opt/sqlmap/sqlmap.py --update", shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, stderr = updateproc.communicate() if stderr: failure_email("Update of sqlmap failed with error:\n\n%s" % stderr) regressionproc = subprocess.Popen( "python /opt/sqlmap/sqlmap.py --live-test", shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=False) stdout, stderr = regressionproc.communicate() if stderr: failure_email("Execution of regression test failed with error:\n\n%s" % stderr) failed_tests = re.findall( "running live test case: (.+?) \((\d+)\/\d+\)[\r]*\n.+test failed (at parsing items: (.+))?\s*\- scan folder: (\/.+) \- traceback: (.*?)( - SQL injection not detected)?[\r]*\n", stdout, re.M) for failed_test in failed_tests: title = failed_test[0] test_count = int(failed_test[1]) parse = failed_test[3] if failed_test[3] else None output_folder = failed_test[4] traceback = False if failed_test[5] == "False" else bool( failed_test[5]) detected = False if failed_test[6] else True test_counts.append(test_count) console_output_file = os.path.join(output_folder, "console_output") log_file = os.path.join(output_folder, TARGET, "log") traceback_file = os.path.join(output_folder, "traceback") if os.path.exists(console_output_file): console_output_fd = codecs.open(console_output_file, "rb", "utf8") console_output = console_output_fd.read() console_output_fd.close() attachments[test_count] = str(console_output) if os.path.exists(log_file): log_fd = codecs.open(log_file, "rb", "utf8") log = log_fd.read() log_fd.close() if os.path.exists(traceback_file): traceback_fd = codecs.open(traceback_file, "rb", "utf8") traceback = traceback_fd.read() traceback_fd.close() content += "Failed test case '%s' (#%d)" % (title, test_count) if parse: content += " at parsing: %s:\n\n" % parse content += "### Log file:\n\n" content += "%s\n\n" % log elif not detected: content += " - SQL injection not detected\n\n" else: content += "\n\n" if traceback: content += "### Traceback:\n\n" content += "%s\n\n" % str(traceback) content += "#######################################################################\n\n" end_string = "Regression test finished at %s" % time.strftime( "%H:%M:%S %d-%m-%Y", time.gmtime()) if content: content += end_string SUBJECT = "Failed %s (%s)" % (SUBJECT, ", ".join( "#%d" % count for count in test_counts)) msg = prepare_email(content) for test_count, attachment in attachments.items(): attachment = MIMEText(attachment) attachment.add_header("Content-Disposition", "attachment", filename="test_case_%d_console_output.txt" % test_count) msg.attach(attachment) send_email(msg) else: SUBJECT = "Successful %s" % SUBJECT msg = prepare_email("All test cases were successful\n\n%s" % end_string) send_email(msg)
ITEM_PIPELINES = { 'FirmCrawler.pipelines.FirmcrawlerPipeline':300, } LOG_LEVEL = 'INFO' # Crawl responsibly by identifying yourself (and your website) on the user-agent #USER_AGENT = 'FirmCrawler (+http://www.yourdomain.com)' # Obey robots.txt rules ROBOTSTXT_OBEY = False #get mongodb info import codecs import ConfigParser config = ConfigParser.ConfigParser() configfile = r'./scrapy.cfg' config.readfp(codecs.open(configfile,'r','utf-8')) MONGO_URI = config.get('mongo_cfg',"MONGO_IP") MONGO_PORT = config.get('mongo_cfg',"MONGO_PORT") MONGO_DATABASE = config.get('mongo_cfg',"MONGO_DATABASE") MONGO_COLLECTION = config.get('mongo_cfg',"MONGO_SCRAPY_COLLECTION_NAME") # #edit by @zhangguodong # dirs_root = config.get('mogo_cfg',"FIRMWARE_STORE_PATH") # #file_sile = config.get('mongo_cfg',"") # configfile =r'./CONFIG.cfg' # Configure maximum concurrent requests performed by Scrapy (default: 16) #CONCURRENT_REQUESTS = 32 # Configure a delay for requests for the same website (default: 0)
from django.conf import settings from django_bootstrap import bootstrap bootstrap(__file__) from application.models import SubmissionInfo, Applicant, Major, MajorPreference, PersonalInfo from confirmation.models import AdmissionMajorPreference, AdmissionWaiver from utils import get_submitted_applicant_dict applicants = get_submitted_applicant_dict({ 'preference': MajorPreference, 'personal_info': PersonalInfo, 'submission_info': SubmissionInfo, }) f = codecs.open(file_name, encoding="utf-8", mode="w") pref = {} uses_nat_id = '--nat' in sys.argv SUBMISSION_RANK = {1: 2, # doc by mail 2: 1, # online 3: 3} # all by mail for applicantion_id in sorted(applicants.keys()): applicant = applicants[applicantion_id] #if not applicant.submission_info.doc_reviewed_complete: # continue if AdmissionWaiver.is_waived(applicant):
## 각 판례에서 빼야 할 단어를 위한 리스트: word_list ## TODO: Q.word_list를 나눈 기준은 무엇인가? 그것을 사람의 기준으로 나누고 시작하는 것이 과연 올바른 word2vec을 위한 전처리인가? with open(fileName_dnusing_wordSet, 'r') as infile: word_list = [line.rstrip() for line in infile] ## 타이틀만 모아놓은 것 리스트로 만들기 with open(fileName_title, 'r') as infile2: title_list = [line.rstrip() for line in infile2] ## 키워드 넘버만 모아놓은 것 리스트 만들기 # 왜 이부분에서 오류가 나는 것인지, 이유가 무엇인지 알고 있는가? try: with open(fileName_keyNum) as infile3: keyNum_list = [line.rstrip() for line in infile3] except UnicodeDecodeError: ## TODO: 텍스트 파일을 불러올 때, 에러가 나는 이유는 무엇인가? 왜 그래서 codecs를 써야 하는가? with codecs.open(fileName_keyNum, "r", "utf-8") as infile3: keyNum_list = [line.rstrip() for line in infile3] ## 여기서 pasing 된 corpus 값을 명사만 추출하여 각 단어별 갯수를 딕셔너리로 만들어주고 append ## TODO: 과연 이 데이터에서 명사만을 추출하는 것이 올바른 접근법인가? 이것으로 명사들과의 관계나 유사도를 올바르게 판단할 수 있는가? def append_noun_words(corpus): noun_words = ['NNG', 'NNB', 'NP'] # 일반명사, 고유명사, 대명사만 학습 // 이렇게 한 이유에 대해서? results = [] for text in corpus: for noun_word in noun_words: if noun_word in text[1]: results.append(text[0]) return results
# plt.imshow(wc) # 显示词云 # plt.axis('off') # 关闭坐标轴 # plt.show() # 显示图像 # 导入扩展库 import re # 正则表达式库 import collections # 词频统计库 import numpy as np # numpy数据处理库 import jieba # 结巴分词 import wordcloud # 词云展示库 from PIL import Image # 图像处理库 import matplotlib.pyplot as plt # 图像展示库 # 读取文件 text = codecs.open(r'C:\Users\think\Desktop\情感分析\doc\answer.txt', 'r', encoding='utf-8').read() print(text) #fn = open(r'C:\Users\think\Desktop\情感分析\answer.txt') # 打开文件 #string_data = fn.read() # 读出整个文件 #fn.close() # 关闭文件 # 文本预处理 pattern = re.compile(u'\t|\n|\.|-|:|;|\)|\(|\?|"') # 定义正则表达式匹配模式 string_data = re.sub(pattern, '', text) # 将符合模式的字符去除 # 文本分词 seg_list_exact = jieba.cut(text, cut_all=False) # 精确模式分词 object_list = [] remove_words = [ u'的', u',', u'和', u'是', u'随着', u'对于', u'对', u'等', u'能', u'都', u'。', u' ',
def from_file(self, filepath, encoding="utf-8"): """Read TAP file using `filepath` as source.""" with codecs.open(filepath, encoding=encoding) as fp: for line in fp.readlines(): self.parse_line(line.rstrip("\n\r"))
def extract_header_body(filename): """Extract the text of the headers and body from a yaml headed file.""" import codecs with codecs.open(filename, 'rb', 'utf-8') as f: metadata, content = frontmatter.parse(f.read()) return metadata, content