def correct_INCA_format(fp): fp_list = list() fp.seek(0) if '(' in fp.readline(): for line in fp: line = line.replace( "(MLX::", "").replace( " : ", "\t").replace( " :", "\t").replace( " ", "\t").lower().strip().replace( ")", "\n") if "record-by" in line: if "image" in line: line = "record-by\timage" if "vector" in line: line = "record-by\tvector" if "dont-care" in line: line = "record-by\tdont-care" fp_list.append(line) fp = StringIO() fp.writelines(fp_list) fp.seek(0) return fp
def test_lazy_load_index(): f = StringIO() dump({'wakka': 42}, f) f.seek(0) lj = LazyJSON(f) assert_equal({'wakka': 10, '__total__': 0}, lj.offsets) assert_equal({'wakka': 2, '__total__': 14}, lj.sizes)
def run_pip_main(cls, *args, **kwargs): import pip args = list(args) check_output = kwargs.pop('check_output', False) if check_output: from io import StringIO out = StringIO() sys.stdout = out try: pip.main(args) except: traceback.print_exc() finally: sys.stdout = sys.__stdout__ out.seek(0) pipdata = out.read() out.close() print(pipdata) return pipdata else: return pip.main(args)
def test_versions_and_sources_last(self): cfg = self.given_a_file_in_test_dir('buildout.cfg', '''\ [buildout] [versions] [sources] [www] [zzz] [aaa]''') output = StringIO() sort(open(cfg), output) output.seek(0) expected = '''\ [buildout] [aaa] [www] [zzz] [sources] [versions] ''' self.assertEqual(expected, output.read())
def test_opened_file(self): sio = StringIO() sio.write('test_data') sio.seek(0) file, close = open_if_filename(sio) assert not close eq_('test_data', file.read())
def test_graph_bad_version_to_dot(self): expected = ( ('towel-stuff', 'bacon', 'bacon (<=0.2)'), ('grammar', 'bacon', 'truffles (>=1.2)'), ('choxie', 'towel-stuff', 'towel-stuff (0.1)'), ('banana', 'strawberry', 'strawberry (>=0.5)'), ) dists = [] for name in self.DISTROS_DIST + self.DISTROS_EGG + self.BAD_EGGS: dist = get_distribution(name, use_egg_info=True) self.assertNotEqual(dist, None) dists.append(dist) graph = depgraph.generate_graph(dists) buf = StringIO() depgraph.graph_to_dot(graph, buf) buf.seek(0) matches = [] lines = buf.readlines() for line in lines[1:-1]: # skip the first and the last lines if line[-1] == '\n': line = line[:-1] match = self.EDGE.match(line.strip()) self.assertIsNot(match, None) matches.append(match.groups()) self.checkLists(matches, expected)
def test_simple(self): f = StringIO() c = Commit() c.committer = c.author = "Jelmer <*****@*****.**>" c.commit_time = c.author_time = 1271350201 c.commit_timezone = c.author_timezone = 0 c.message = "This is the first line\nAnd this is the second line.\n" c.tree = Tree().id write_commit_patch(f, c, "CONTENTS", (1, 1), version="custom") f.seek(0) lines = f.readlines() self.assertTrue(lines[0].startswith("From 0b0d34d1b5b596c928adc9a727a4b9e03d025298")) self.assertEqual(lines[1], "From: Jelmer <*****@*****.**>\n") self.assertTrue(lines[2].startswith("Date: ")) self.assertEqual([ "Subject: [PATCH 1/1] This is the first line\n", "And this is the second line.\n", "\n", "\n", "---\n"], lines[3:8]) self.assertEqual([ "CONTENTS-- \n", "custom\n"], lines[-2:]) if len(lines) >= 12: # diffstat may not be present self.assertEqual(lines[8], " 0 files changed\n")
class LoggerTest(unittest.TestCase): """Testcase for the Logger actor. """ def setUp(self): self._file_like = StringIO() self._logger = Logger.start(file_like=self._file_like).proxy() def tearDown(self): self._logger.stop() def _stop_logger(self): self._logger.stop() self._logger.actor_stopped.get().wait() def test_records_are_recorded(self): """Verify that logging records produces messages. """ mutation_record = MutationRecord( 'foo', 'foo.py', 'operator', {'description': 'desc', 'line_number': 3}, None) test_result = TestResult(Outcome.KILLED, 'ok') self._logger.handle_result(mutation_record, test_result) self._stop_logger() self._file_like.flush() self._file_like.seek(0) self.assertGreater(len(self._file_like.read()), 0)
def test_graph_disconnected_to_dot(self): dependencies_expected = ( ('towel-stuff', 'bacon', 'bacon (<=0.2)'), ('grammar', 'bacon', 'truffles (>=1.2)'), ('choxie', 'towel-stuff', 'towel-stuff (0.1)'), ('banana', 'strawberry', 'strawberry (>=0.5)'), ) disconnected_expected = ('cheese', 'bacon', 'strawberry') dists = [] for name in self.DISTROS_DIST + self.DISTROS_EGG: dist = get_distribution(name, use_egg_info=True) self.assertNotEqual(dist, None) dists.append(dist) graph = depgraph.generate_graph(dists) buf = StringIO() depgraph.graph_to_dot(graph, buf, skip_disconnected=False) buf.seek(0) lines = buf.readlines() dependencies_lines = [] disconnected_lines = [] # First sort output lines into dependencies and disconnected lines. # We also skip the attribute lines, and don't include the "{" and "}" # lines. disconnected_active = False for line in lines[1:-1]: # Skip first and last line if line.startswith('subgraph disconnected'): disconnected_active = True continue if line.startswith('}') and disconnected_active: disconnected_active = False continue if disconnected_active: # Skip the 'label = "Disconnected"', etc. attribute lines. if ' = ' not in line: disconnected_lines.append(line) else: dependencies_lines.append(line) dependencies_matches = [] for line in dependencies_lines: if line[-1] == '\n': line = line[:-1] match = self.EDGE.match(line.strip()) self.assertIsNot(match, None) dependencies_matches.append(match.groups()) disconnected_matches = [] for line in disconnected_lines: if line[-1] == '\n': line = line[:-1] line = line.strip('"') disconnected_matches.append(line) self.checkLists(dependencies_matches, dependencies_expected) self.checkLists(disconnected_matches, disconnected_expected)
def test_exclusion_regexp(self): entries = [{'MESSAGE': 'exclude this'}, {'MESSAGE': 'message 1'}, {'MESSAGE': 'exclude that'}, {'MESSAGE': 'message 2'}] (flexmock(journal.Reader) .should_receive('get_next') .and_return(entries[0]) .and_return(entries[1]) .and_return(entries[2]) .and_return(entries[3]) .and_return({})) exclusions = [{'MESSAGE': ['/1/']}, # shouldn't exclude anything {'MESSAGE': ['/exclude th/']}, {'MESSAGE': ['/exclude/']}] formatter = EntryFormatter() jfilter = JournalFilter(journal.Reader(), [formatter], default_exclusions=exclusions) output = StringIO() jfilter.format(output) output.seek(0) lines = output.read().splitlines() assert lines == [entry['MESSAGE'] for entry in [entries[1]] + [entries[3]]] stats = jfilter.get_statistics() for stat in stats: if stat.exclusion['MESSAGE'] == ['/1/']: assert stat.hits == 0 break
def save(self, force_update=False, force_insert=False, thumb_size=(IMAGE_MAX_HEIGHT, IMAGE_MAX_WIDTH)): """Save the article. This will save thumbnail on disk and then save the model in database. """ self.slug = slugify(self.title) if has_changed(self, 'image') and self.image: # TODO : delete old image image = Image.open(self.image) if image.mode not in ('L', 'RGB'): image = image.convert('RGB') image.thumbnail(thumb_size, Image.ANTIALIAS) # save the thumbnail to memory temp_handle = StringIO() image.save(temp_handle, 'png') temp_handle.seek(0) # rewind the file # save to the thumbnail field suf = SimpleUploadedFile(os.path.split(self.image.name)[-1], temp_handle.read(), content_type='image/png') self.thumbnail.save('{}.png'.format(suf.name), suf, save=False) # save the image object super().save(force_update, force_insert) else: super().save()
def check_cpaste(code, should_fail=False): """Execute code via 'cpaste' and ensure it was executed, unless should_fail is set. """ ip.user_ns['code_ran'] = False src = StringIO() if not hasattr(src, 'encoding'): # IPython expects stdin to have an encoding attribute src.encoding = None src.write(code) src.write('\n--\n') src.seek(0) stdin_save = sys.stdin sys.stdin = src try: context = tt.AssertPrints if should_fail else tt.AssertNotPrints with context("Traceback (most recent call last)"): ip.magic('cpaste') if not should_fail: assert ip.user_ns['code_ran'], "%r failed" % code finally: sys.stdin = stdin_save
def get_all_rows(es_params, request): buffer_ = StringIO() writer = csv.writer(buffer_) writer.writerow([feature for feature in es_params['features']]) ds = Datasets().activate_dataset(request.session) es_m = ds.build_manager(ES_Manager) es_m.build(es_params) es_m.set_query_parameter('size', ES_SCROLL_BATCH) features = sorted(es_params['features']) response = es_m.scroll() scroll_id = response['_scroll_id'] hits = response['hits']['hits'] while hits: process_hits(hits, features, write=True, writer=writer) buffer_.seek(0) data = buffer_.read() buffer_.seek(0) buffer_.truncate() yield data response = es_m.scroll(scroll_id=scroll_id) hits = response['hits']['hits'] scroll_id = response['_scroll_id']
def get_version_info(url): out_stream = StringIO() pkg_maxlen = 0 version_maxlen = 0 version_sections = extract_versions_section(url) for pkg_name, version_infos in version_sections.items(): pkg_maxlen = max(len(pkg_name), pkg_maxlen) for version_info in version_infos: version_maxlen = max(len(version_info.version), version_maxlen) outfmt = ('{{pkg_name:{pkg_maxlen}}} = {{color}}{{version_info.version:{version_maxlen}}}{reset} {{index}} {{version_info.origin}}\n' # NOQA .format(pkg_maxlen=pkg_maxlen, version_maxlen=version_maxlen, reset=colorama.Fore.RESET)) for pkg_name, version_infos in sorted(version_sections.items()): if (max((parse_version(version_info.version) for version_info in version_infos)) != parse_version(version_infos[0].version)): color = colorama.Fore.RED else: color = colorama.Fore.RESET for index, version_info in enumerate(version_infos): pkg_name_to_show = pkg_name if not index else '' out_stream.write(outfmt.format(pkg_name=pkg_name_to_show, version_info=version_info, color=color, index=index)) out_stream.seek(0) return out_stream
def render_me(element): ''' a utility to render an elment so you can see if its doing the right thing''' f = StringIO() element.render(f) f.seek(0) return f.read()
def _parser_to_string_io(parser): """Turns a ConfigParser into a StringIO stream.""" memory_file = StringIO() parser.write(memory_file) memory_file.flush() memory_file.seek(0) return memory_file
def test_reading_successful(self): """ Test the successful reading of CSV Line and have the record passed through to the Data Controller. The Data Controller is this TestCsvImporter class acting as a stub - it implements necessary 'public' method. Expected: - One record will be passed through to process_record method - self.process_record_call is True - self.start_import_called is True """ input_text = StringIO() input_text.write( u'date,category,employee name,employee address,expense description,pre-tax amount,tax name,tax amount\n') input_text.write( u'12/1/2013,Travel,Don Draper,"783 Park Ave, New York, NY 10021",Taxi ride, 350.00 ,NY Sales tax, 31.06\n') input_text.seek(0) dialect = csv.Sniffer().sniff(sample=input_text.getvalue()) dialect.delimiter = ',' dialect.quotechar = '"' csv_importer = CsvImporter(data_controller=self) actual_return = csv_importer.import_data(input_text) self.assertEqual(self.upload_id, actual_return) self.assertTrue(self.process_record_call) self.assertTrue(self.start_import_called)
def download(): form = AdvancedSearchForm() form.business_type.default = 'All Entities' if form.validate_on_submit(): q_object = { 'query': form.query.data, 'query_limit': form.query_limit.data, 'index_field': form.index_field.data, 'active': form.active.data, 'sort_by': form.sort_by.data, 'sort_order': form.sort_order.data } try: q_object['start_date'] = datetime.strftime(form.start_date.data, '%Y-%m-%d') q_object['end_date'] = datetime.strftime(form.end_date.data, '%Y-%m-%d') except TypeError: q_object['start_date'] = date(year=1990, month=1, day=1) q_object['end_date'] = datetime.now() q_object['business_type'] = form.business_type.data results = query(q_object) file = StringIO() writer = csv.DictWriter(file, fieldnames=['name', 'id', 'origin date', 'status', 'type', 'street', 'city', 'state', 'zip']) writer.writeheader() for biz in results.all(): row = {'name': biz.nm_name, 'id': biz.id_bus, 'origin date': biz.dt_origin, 'status': biz.status, 'type': biz.type, 'street': biz.street, 'city': biz.city, 'state': biz.state, 'zip': biz.zip} writer.writerow(row) file.seek(0) response = Response(file, content_type='text/csv') response.headers['Content-Disposition'] = 'attachment; filename=sots_search_results.csv' return response
def _get_image_buf(self, path): # path = next((p.path for p in self.records if p.name == path), None) if path: buf = StringIO() self.image_getter.get(path, buf) buf.seek(0) return buf
def test_to_csv_stringio(self): buf = StringIO() self.frame.to_csv(buf) buf.seek(0) recons = read_csv(buf, index_col=0) # TODO to_csv drops column name assert_frame_equal(recons, self.frame, check_names=False)
def run_netstat(self): buf = StringIO() netstat(u'-su', _out=buf) buf.seek(0) data = [] donot_parse = True for line in buf: line = line.rstrip('\n') if line.startswith('Udp:'): donot_parse = False continue if line.startswith('UdpLite:'): donot_parse = True if donot_parse: continue m = match(r'^\s*([0-9]+)\b([^$]+)$', line) if not m: continue data.append({ 'key': m.group(2).strip(' '), 'value': m.group(1).strip(' ') }) return data
def _FunctionDef(self, t): self.write("\n") if 'postgresql_function' in [deco.id for deco in t.decorator_list]: body = StringIO() Unparser(t.body, self.engine, body) body.seek(0) create = CreateFunction(t.name, self._get_type(t.returns), [(arg.arg, self._get_type(arg.annotation)) for arg in t.args.args], body.read()) self.engine.execute(create) self.fill("def " + t.name + "(") self.dispatch(t.args) self.write(")") self.enter() self.fill("return func.%s(%s)" % (t.name, ','.join(arg.arg for arg in t.args.args))) self.leave() else: # Normal function for deco in t.decorator_list: self.fill("@") self.dispatch(deco) self.fill("def "+t.name + "(") self.dispatch(t.args) self.write(")") self.enter() self.dispatch(t.body) self.leave()
def runTest(self): from django.core.management.validation import get_validation_errors from django.db.models.loading import load_app from io import StringIO try: module = load_app(self.model_label) except Exception as e: self.fail('Unable to load invalid model module') # Make sure sys.stdout is not a tty so that we get errors without # coloring attached (makes matching the results easier). We restore # sys.stderr afterwards. orig_stdout = sys.stdout s = StringIO() sys.stdout = s count = get_validation_errors(s, module) sys.stdout = orig_stdout s.seek(0) error_log = s.read() actual = error_log.split('\n') expected = module.model_errors.split('\n') unexpected = [err for err in actual if err not in expected] missing = [err for err in expected if err not in actual] self.assertTrue(not unexpected, "Unexpected Errors: " + '\n'.join(unexpected)) self.assertTrue(not missing, "Missing Errors: " + '\n'.join(missing))
def serialize(self, querySet, title, header, exportDescription): """Serialize data to XLS. XXX: TODO: Currently, this is memory-inefficient. """ book = xlwt.Workbook(encoding='utf-8') sheet = book.add_sheet(title) cur_row = 0 for row in exportDescription: for col_no, col in enumerate(row): sheet.write(cur_row, col_no, col) cur_row += 1 cur_row += 1 for col_no, col in enumerate(header): sheet.write(cur_row, col_no, header[col_no]) for row_no, row in enumerate( QuerySetSerializer.serialize(self, querySet)): for col_no, col in enumerate(row.keys()): sheet.write(cur_row + row_no + 1, col_no, row[col]) output = StringIO() book.save(output) output.seek(0) return output
def test_add_one(self): """Add a Packet to the Packets worksheet.""" messages = StringIO() wb = Workbook() ws = wb.active pws = PacketsWorksheet(ws) pws.setup() pkt = Packet(sku='8675309', price='3.50') pkt.quantity = Quantity(value=100, units='seeds') cv = Cultivar(name='Foxy') cv.common_name = CommonName(name='Foxglove') cv.common_name.index = Index(name='Perennial') pkt.cultivar = cv pws.add_one(pkt, stream=messages) assert pws.cell( 2, pws.cols['Cultivar (JSON)'] ).value == json.dumps(cv.queryable_dict) assert pws.cell(2, pws.cols['SKU']).value == '8675309' assert pws.cell(2, pws.cols['Price']).value == '3.50' assert pws.cell(2, pws.cols['Quantity']).value == '100' assert pws.cell(2, pws.cols['Units']).value == 'seeds' messages.seek(0) msgs = messages.read() assert ('Adding data from <Packet SKU #8675309> to row #2 of packets ' 'worksheet.') in msgs
def post(self): if 'attachmentName' in request.files: file = request.files['attachmentName'] if file: filename = secure_filename(file.name) file.save(filename) book = xlrd.open_workbook(filename) conn = psycopg2.connect() cur = conn.cursor() input_buffer = StringIO() n = 0 for sh in book.sheets(): for i in range(2, sh.nrows): line = '\t'.join([str(x) for x in sh.row_values(i)])+'\n' line = self.block(sh.name, line) if line: input_buffer.write(line) if n % 50 == 0: input_buffer.seek(0) cur.copy_from(input_buffer, table[sh.name]) conn.commit() input_buffer = StringIO() n += 1 cur.copy_from(input_buffer, table[sh.name]) input_buffer = StringIO() conn.commit() cur.execute("select * from ms_infor") print(cur.fetchall()) return '导入成功' return '导入失败'
def download_account_tx(self, account_id): print('+++ Downloading Account transactions of [{}]'.format(account_id)) csv_path = os.path.join(self.download_parts_dir, '{}.csv'.format(account_id)) if os.path.exists(csv_path): print('!!! Skipped to download! records for [{}] account already downloaded!'.format(account_id)) return tx_url = self.account_tx_url.format(account_id=account_id) response = self.general_session.get(tx_url) soup = BeautifulSoup(response.text, 'html.parser') table = soup.find('table', attrs={'class': 'account stripe'}) if not table: print('!!! The account could not be found for [{}]'.format(account_id)) return trs = table.find_all('tr')[1:] data_temp = StringIO() csv_columns = ['Parcel_ID', 'Amount', 'Tax_Year', 'Tax_Type', 'Effective_Date', 'Balance'] writer = csv.DictWriter(data_temp, fieldnames=csv_columns) writer.writeheader() for tr in trs: tds = tr.find_all('td') tax_year = tds[0].text tax_type = tds[1].text effective_date = tds[2].text amount = tds[3].text.replace(',', '').replace('$', '') balance = tds[4].text row = self._standardize_row({'Parcel_ID': account_id, 'Amount': amount, 'Tax_Year': tax_year, 'Tax_Type': tax_type, 'Effective_Date': effective_date, 'Balance': balance}) writer.writerow(row) data_temp.seek(0) with open(csv_path, 'w') as csv_file: csv_file.write(data_temp.read())
def test_add_one_no_optionals(self): """Add a Cultivar to the Cultivars worksheet.""" messages = StringIO() wb = Workbook() ws = wb.active cvws = CultivarsWorksheet(ws) cvws.setup() cv = Cultivar(name='Foxy') cv.common_name = CommonName(name='Foxglove') cv.common_name.index = Index(name='Perennial') cvws.add_one(cv, stream=messages) assert cvws.cell(2, cvws.cols['Index']).value == 'Perennial' assert cvws.cell(2, cvws.cols['Common Name']).value == 'Foxglove' assert cvws.cell(2, cvws.cols['Cultivar Name']).value == 'Foxy' assert cvws.cell(2, cvws.cols['Section']).value is None assert cvws.cell(2, cvws.cols['Botanical Name']).value is None assert cvws.cell(2, cvws.cols['Thumbnail Filename']).value is None assert cvws.cell(2, cvws.cols['Description']).value is None assert cvws.cell(2, cvws.cols['Synonyms']).value is None assert cvws.cell(2, cvws.cols['New Until']).value is None assert cvws.cell(2, cvws.cols['In Stock']).value == 'False' assert cvws.cell(2, cvws.cols['Active']).value == 'False' messages.seek(0) msgs = messages.read() assert ('Adding data from <Cultivar "Foxy Foxglove"> to row #2 of ' 'cultivars worksheet.') in msgs
def _get_env_variable(self, var_name, default=False): """ Get the environment variable or return exception :param var_name: Environment Variable to lookup """ try: return os.environ[var_name] except KeyError: from io import StringIO from configparser import ConfigParser env_file = os.environ.get('PROJECT_ENV_FILE', self._root_folder_path + "/.env") try: config = StringIO() config.write("[DATA]\n") config.write(open(env_file).read()) config.seek(0, os.SEEK_SET) cp = ConfigParser() cp.read_file(config) value = dict(cp.items('DATA'))[var_name.lower()] if value.startswith('"') and value.endswith('"'): value = value[1:-1] elif value.startswith("'") and value.endswith("'"): value = value[1:-1] os.environ.setdefault(var_name, value) return value except (KeyError, IOError): if default is not False: return default error_msg = "Either set the env variable '{var}' or place it in your " \ "{env_file} file as '{var} = VALUE'" raise ConfigurationError(error_msg.format(var=var_name, env_file=env_file))
def read_csv_file_from_request(res): writer = StringIO() remove_last_updated_date(res, writer) writer.seek(0) return writer.read()
def test_thermal_1(self): """tests various thermal cards""" log = SimpleLogger(level='warning') model = BDF(log=log, debug=False) model.sol = 101 lines = [ 'SUBCASE 1', ' DISP(PLOT) = ALL', ' ANALYSIS = HEAT', 'BEGIN BULK', ] model.case_control_deck = CaseControlDeck(lines, log=None) model.add_grid(11, [0., 0., 0.]) model.add_grid(12, [1., 0., 0.]) model.add_grid(13, [1., 1., 0.]) model.add_grid(14, [0., 1., 0.]) model.add_grid(15, [0., 2., 0.]) eid = 1 pid = 1 mid = 1 nodes = [11, 12, 13, 14] model.add_cquad4(eid, pid, nodes, theta_mcid=0.0, zoffset=0., tflag=0, T1=1.0, T2=1.0, T3=1.0, T4=1.0, comment='') model.add_pshell(pid, mid1=1, t=0.1) eid = 10 nids = [11, 12, 13, 15] pid = 2 model.add_ctetra(eid, pid, nids) model.add_psolid(pid, mid) E = 3.0e7 G = None nu = 0.3 model.add_mat1(mid, E, G, nu) eid = 2 Type = 'AREA3' chbdyg = CHBDYG(eid, Type, nodes, iview_front=0, iview_back=0, rad_mid_front=0, rad_mid_back=0, comment='chbdyg') with self.assertRaises(ValueError): chbdyg.validate() Type = 'AREA4' chbdyg = model.add_chbdyg(eid, Type, nodes, iview_front=0, iview_back=0, rad_mid_front=0, rad_mid_back=0, comment='chbdyg') chbdyg.raw_fields() eid = 3 eid2 = 4 side = 1 chbdye = model.add_chbdye(eid, eid2, side, iview_front=0, iview_back=0, rad_mid_front=0, rad_mid_back=0, comment='chbdye') chbdye.raw_fields() eid = 4 g1 = 11 g2 = 12 pid = 10 # fails on AREA4 because op2 doesn't support it Type = 'LINE' chbdyp = model.add_chbdyp(eid, pid, Type, g1, g2, g0=0, gmid=None, ce=0, iview_front=0, iview_back=0, rad_mid_front=0, rad_mid_back=0, e1=None, e2=None, e3=None, comment='chbdyp') chbdyp.raw_fields() phbdy = model.add_phbdy(pid, af=None, d1=None, d2=None, comment='phbdy') phbdy.raw_fields() #--------------------------- ta = 2 ta1 = 2 pconid = 11 conv = model.add_conv(eid, pconid, ta, film_node=0, cntrlnd=0, comment='conv') conv.raw_fields() pconv = model.add_pconv(pconid, mid, form=0, expf=0.0, ftype=0, tid=None, chlen=None, gidin=None, ce=0, e1=None, e2=None, e3=None, comment='pconv') pconv.raw_fields() pconid = 12 convm = model.add_convm(eid, pconid, ta1, film_node=0, cntmdot=0, ta2=None, mdot=1.0, comment='convm') convm.raw_fields() coef = 0.023 pconvm = model.add_pconvm(pconid, mid, coef, form=0, flag=0, expr=0.0, exppi=0.0, exppo=0.0, comment='pconvm') pconvm.raw_fields() radmid = 42 absorb = 0.2 emissivity = 0.8 radm = model.add_radm(radmid, absorb, emissivity, comment='radm') radm.raw_fields() famb = 100. nodamb = 33 eids = [1] cntrlnd = 1000 radbc = model.add_radbc(nodamb, famb, cntrlnd, eids, comment='radbc') radbc.raw_fields() sid = 43 qvol = 17. control_point = 1001 elements = [1, 2] qvol = model.add_qvol(sid, qvol, control_point, elements, comment='qvol') qvol.raw_fields() q0 = 18. t_source = 19. eids = [2] qvect = model.add_qvect(sid, q0, eids, t_source, ce=0, vector_tableds=None, control_id=0, comment='qvect') qvect.raw_fields() q0 = 15.8 flag = 'POINT' grids = [-1] qhbdy = model.add_qhbdy(sid, flag, q0, grids, af=None, comment='qhbdy') qhbdy.raw_fields() qflux = 20. eids = [1] qbdy1 = model.add_qbdy1(sid, qflux, eids, comment='qbdy1') qbdy1.raw_fields() eid = 1 qfluxs = 12. qbdy2 = model.add_qbdy2(sid, eid, qfluxs, comment='qbdhy2') qbdy2.raw_fields() q0 = 14. cntrlnd = 57 eids = [1, 2] qbdy3 = model.add_qbdy3(sid, q0, cntrlnd, eids, comment='qbdy3') qbdy3.raw_fields() temperature = 13.3 model.add_tempd(sid, temperature, comment='tempd') fields = ['TEMPD', 101, 1., 102, 2., 103, 3., 104, 4.] model.add_card(fields, 'TEMPD') temperatures = { 15: 37., 16: 38., } model.add_temp(sid, temperatures) #------------------------- bdf_filename = StringIO() bdf_filename2 = StringIO() bdf_filename3 = StringIO() bdf_filename4 = StringIO() model.validate() model._verify_bdf(xref=False) model.write_bdf(bdf_filename, encoding=None, size=8, is_double=False, interspersed=False, enddata=None, close=False) model.cross_reference() model.pop_xref_errors() model._verify_bdf(xref=True) model.write_bdf(bdf_filename2, encoding=None, size=16, is_double=False, interspersed=False, enddata=None, close=False) model.write_bdf(bdf_filename3, encoding=None, size=16, is_double=True, interspersed=False, enddata=None, close=False) write_bdf_symmetric(model, bdf_filename4, encoding=None, size=8, is_double=False, enddata=None, close=False, plane='xz') #model.cross_reference() #print(bdf_filename.getvalue()) bdf_filename2.seek(0) model2 = read_bdf(bdf_filename2, xref=False, log=log, debug=False) model2.safe_cross_reference() save_load_deck(model, punch=False, run_renumber=False, run_test_bdf=False)
# -*- coding: utf-8 -*- import os from io import StringIO from PIL import Image, ImageFilter, ImageDraw, ImageFont, ImageEnhance, ImageFilter import pygame pygame.init() text = u"这是一段测试文本,test 123。" im = Image.new("RGB", (1024, 755), (255, 255, 255)) font = pygame.font.SysFont("arial", 12) # dr.text((10, 5), text, font=font, fill="#000000") rtext = font.render(text, True, (0, 0, 0), (255, 255, 255)) # pygame.image.save(rtext, "t.gif") sio = StringIO() pygame.image.save(rtext, sio) sio.seek(0) line = Image.open(sio) im.paste(line, (10, 5)) im.show() im.save("t.png")
class Curl: """ Class to control curl on behalf of the application. """ cookie = None dropcookie = None referer = None headers = None proxy = None ignoreproxy = None tcp_nodelay = None xforw = None xclient = None atype = None acred = None #acert = None retries = 1 delay = 0 followred = 0 fli = None agents = [] # user-agents try: f = open("core/fuzzing/user-agents.txt").readlines( ) # set path for user-agents except: f = open("fuzzing/user-agents.txt").readlines( ) # set path for user-agents when testing for line in f: agents.append(line) agent = random.choice(agents).strip() # set random user-agent def __init__( self, base_url="", fakeheaders=[ 'Accept: image/gif, image/x-bitmap, image/jpeg, image/pjpeg', 'Connection: Keep-Alive', 'Content-type: application/x-www-form-urlencoded; charset=UTF-8' ]): self.handle = pycurl.Curl() self._closed = False self.set_url(base_url) self.verbosity = 0 self.signals = 1 self.payload = "" self.header = StringIO() self.fakeheaders = fakeheaders self.headers = None self.set_option(pycurl.SSL_VERIFYHOST, 0) self.set_option(pycurl.SSL_VERIFYPEER, 0) try: self.set_option( pycurl.SSLVERSION, pycurl.SSLVERSION_TLSv1_2) # max supported version by pycurl except: try: self.set_option(pycurl.SSLVERSION, pycurl.SSLVERSION_TLSv1_1) except: # use vulnerable TLS/SSL versions (TLS1_0 -> weak enc | SSLv2 + SSLv3 -> deprecated) try: self.set_option(pycurl.SSLVERSION, pycurl.SSLVERSION_TLSv1_0) except: try: self.set_option(pycurl.SSLVERSION, pycurl.SSLVERSION_SSLv3) except: self.set_option(pycurl.SSLVERSION, pycurl.SSLVERSION_SSLv2) self.set_option(pycurl.FOLLOWLOCATION, 0) self.set_option(pycurl.MAXREDIRS, 50) # this is 'black magic' self.set_option(pycurl.COOKIEFILE, '/dev/null') self.set_option(pycurl.COOKIEJAR, '/dev/null') self.set_timeout(30) self.set_option(pycurl.NETRC, 1) self.set_nosignals(1) def payload_callback(x): self.payload += str(x) self.set_option(pycurl.WRITEFUNCTION, payload_callback) def header_callback(x): self.header.write(str(x)) self.set_option(pycurl.HEADERFUNCTION, header_callback) def set_url(self, url): """ Set the base url. """ self.base_url = url self.set_option(pycurl.URL, self.base_url) return url def set_cookie(self, cookie): """ Set the app cookie. """ self.cookie = cookie self.dropcookie = dropcookie if dropcookie: self.set_option(pycurl.COOKIELIST, 'ALL') self.set_option(pycurl.COOKIE, None) else: self.set_option(pycurl.COOKIELIST, '') self.set_option(pycurl.COOKIE, self.cookie) return cookie def set_agent(self, agent): """ Set the user agent. """ self.agent = agent self.set_option(pycurl.USERAGENT, self.agent) return agent def set_referer(self, referer): """ Set the referer. """ self.referer = referer self.set_option(pycurl.REFERER, self.referer) return referer def set_headers(self, headers): """ Set extra headers. """ self.headers = headers self.headers = self.headers.split("\n") for headerValue in self.headers: header, value = headerValue.split(": ") if header and value: self.set_option(pycurl.HTTPHEADER, (header, value)) return headers def set_proxy(self, ignoreproxy, proxy): """ Set the proxy to use. """ self.proxy = proxy self.ignoreproxy = ignoreproxy if ignoreproxy: self.set_option(pycurl.PROXY, "") else: self.set_option(pycurl.PROXY, self.proxy) return proxy def set_option(self, *args): """ Set the given option. """ self.handle.setopt(*args) def set_verbosity(self, level): """ Set the verbosity level. """ self.set_option(pycurl.VERBOSE, level) def set_nosignals(self, signals="1"): """ Disable signals. curl will be using other means besides signals to timeout """ self.signals = signals self.set_option(pycurl.NOSIGNAL, self.signals) return signals def set_tcp_nodelay(self, tcp_nodelay): """ Set the TCP_NODELAY option. """ self.tcp_nodelay = tcp_nodelay self.set_option(pycurl.TCP_NODELAY, tcp_nodelay) return tcp_nodelay def set_timeout(self, timeout): """ Set timeout for requests. """ self.set_option(pycurl.CONNECTTIMEOUT, timeout) self.set_option(pycurl.TIMEOUT, timeout) return timeout def set_follow_redirections(self, followred, fli): """ Set follow locations parameters to follow redirection pages (302) """ self.followred = followred self.fli = fli if followred: self.set_option(pycurl.FOLLOWLOCATION, 1) self.set_option(pycurl.MAXREDIRS, 50) if fli: self.set_option(pycurl.MAXREDIRS, fli) else: self.set_option(pycurl.FOLLOWLOCATION, 0) return followred def do_head_check(self, urls): """ Send a HEAD request before to start to inject to verify stability of the target """ for u in urls: self.set_option(pycurl.URL, u) self.set_option(pycurl.NOBODY, 1) self.set_option(pycurl.FOLLOWLOCATION, 1) self.set_option(pycurl.MAXREDIRS, 50) self.set_option(pycurl.SSL_VERIFYHOST, 0) self.set_option(pycurl.SSL_VERIFYPEER, 0) try: self.set_option(pycurl.SSLVERSION, pycurl.SSLVERSION_TLSv1_2 ) # max supported version by pycurl except: try: self.set_option(pycurl.SSLVERSION, pycurl.SSLVERSION_TLSv1_1) except: # use vulnerable TLS/SSL versions (TLS1_0 -> weak enc | SSLv2 + SSLv3 -> deprecated) try: self.set_option(pycurl.SSLVERSION, pycurl.SSLVERSION_TLSv1_0) except: try: self.set_option(pycurl.SSLVERSION, pycurl.SSLVERSION_SSLv3) except: self.set_option(pycurl.SSLVERSION, pycurl.SSLVERSION_SSLv2) if self.fakeheaders: from core.randomip import RandomIP if self.xforw: generate_random_xforw = RandomIP() xforwip = generate_random_xforw._generateip('') xforwfakevalue = ['X-Forwarded-For: ' + str(xforwip)] if self.xclient: generate_random_xclient = RandomIP() xclientip = generate_random_xclient._generateip('') xclientfakevalue = ['X-Client-IP: ' + str(xclientip)] if self.xforw: self.set_option(pycurl.HTTPHEADER, self.fakeheaders + xforwfakevalue) if self.xclient: self.set_option( pycurl.HTTPHEADER, self.fakeheaders + xforwfakevalue + xclientfakevalue) elif self.xclient: self.set_option(pycurl.HTTPHEADER, self.fakeheaders + xclientfakevalue) if self.headers: self.fakeheaders = self.fakeheaders + self.headers self.set_option(pycurl.HTTPHEADER, self.fakeheaders) if self.agent: self.set_option(pycurl.USERAGENT, self.agent) if self.referer: self.set_option(pycurl.REFERER, self.referer) if self.proxy: self.set_option(pycurl.PROXY, self.proxy) if self.ignoreproxy: self.set_option(pycurl.PROXY, "") if self.timeout: self.set_option(pycurl.CONNECTTIMEOUT, self.timeout) self.set_option(pycurl.TIMEOUT, self.timeout) if self.signals: self.set_option(pycurl.NOSIGNAL, self.signals) if self.tcp_nodelay: self.set_option(pycurl.TCP_NODELAY, self.tcp_nodelay) if self.cookie: self.set_option(pycurl.COOKIE, self.cookie) try: self.handle.perform() except: return if str(self.handle.getinfo(pycurl.HTTP_CODE)) in ["302", "301"]: self.set_option(pycurl.FOLLOWLOCATION, 1) def __request(self, relative_url=None): """ Perform a request and returns the payload. """ if self.fakeheaders: from core.randomip import RandomIP if self.xforw: """ Set the X-Forwarded-For to use. """ generate_random_xforw = RandomIP() xforwip = generate_random_xforw._generateip('') xforwfakevalue = ['X-Forwarded-For: ' + str(xforwip)] if self.xclient: """ Set the X-Client-IP to use. """ generate_random_xclient = RandomIP() xclientip = generate_random_xclient._generateip('') xclientfakevalue = ['X-Client-IP: ' + str(xclientip)] if self.xforw: self.set_option(pycurl.HTTPHEADER, self.fakeheaders + xforwfakevalue) if self.xclient: self.set_option( pycurl.HTTPHEADER, self.fakeheaders + xforwfakevalue + xclientfakevalue) elif self.xclient: self.set_option(pycurl.HTTPHEADER, self.fakeheaders + xclientfakevalue) if self.headers: self.fakeheaders = self.fakeheaders + self.headers self.set_option(pycurl.HTTPHEADER, self.fakeheaders) if self.agent: self.set_option(pycurl.USERAGENT, self.agent) if self.referer: self.set_option(pycurl.REFERER, self.referer) if self.proxy: self.set_option(pycurl.PROXY, self.proxy) if self.ignoreproxy: self.set_option(pycurl.PROXY, "") if relative_url: self.set_option(pycurl.URL, os.path.join(self.base_url, relative_url)) if self.timeout: self.set_option(pycurl.CONNECTTIMEOUT, self.timeout) self.set_option(pycurl.TIMEOUT, self.timeout) if self.signals: self.set_option(pycurl.NOSIGNAL, self.signals) if self.tcp_nodelay: self.set_option(pycurl.TCP_NODELAY, self.tcp_nodelay) if self.cookie: self.set_option(pycurl.COOKIE, self.cookie) if self.followred: self.set_option(pycurl.FOLLOWLOCATION, 1) self.set_option(pycurl.MAXREDIRS, 50) if self.fli: self.set_option(pycurl.MAXREDIRS, int(self.fli)) else: self.set_option(pycurl.FOLLOWLOCATION, 0) if self.fli: print( "\n[E] You must launch --follow-redirects command to set correctly this redirections limit\n" ) return """ Set the HTTP authentication method: Basic, Digest, GSS, NTLM or Certificate """ if self.atype and self.acred: atypelower = self.atype.lower() if atypelower not in ("basic", "digest", "ntlm", "gss"): print( "\n[E] HTTP authentication type value must be: Basic, Digest, GSS or NTLM\n" ) return acredregexp = re.search("^(.*?)\:(.*?)$", self.acred) if not acredregexp: print( "\n[E] HTTP authentication credentials value must be in format username:password\n" ) return user = acredregexp.group(1) password = acredregexp.group(2) self.set_option(pycurl.USERPWD, "%s:%s" % (user, password)) if atypelower == "basic": self.set_option(pycurl.HTTPAUTH, pycurl.HTTPAUTH_BASIC) elif atypelower == "digest": self.set_option(pycurl.HTTPAUTH, pycurl.HTTPAUTH_DIGEST) elif atypelower == "ntlm": self.set_option(pycurl.HTTPAUTH, pycurl.HTTPAUTH_NTLM) elif atypelower == "gss": self.set_option(pycurl.HTTPAUTH, pycurl.HTTPAUTH_GSSNEGOTIATE) else: self.set_option(pycurl.HTTPAUTH, None) self.set_option(pycurl.HTTPHEADER, ["Accept:"]) elif self.atype and not self.acred: print( "\n[E] You specified the HTTP authentication type, but did not provide the credentials\n" ) return elif not self.atype and self.acred: print( "\n[E] You specified the HTTP authentication credentials, but did not provide the type\n" ) return #if self.acert: # acertregexp = re.search("^(.+?),\s*(.+?)$", self.acert) # if not acertregexp: # print "\n[E] HTTP authentication certificate option must be 'key_file,cert_file'\n" # return # # os.path.expanduser for support of paths with ~ # key_file = os.path.expanduser(acertregexp.group(1)) # cert_file = os.path.expanduser(acertregexp.group(2)) # self.set_option(pycurl.SSL_VERIFYHOST, 0) # self.set_option(pycurl.SSL_VERIFYPEER, 1) # self.set_option(pycurl.SSH_PUBLIC_KEYFILE, key_file) # self.set_option(pycurl.CAINFO, cert_file) # self.set_option(pycurl.SSLCERT, cert_file) # self.set_option(pycurl.SSLCERTTYPE, 'p12') # self.set_option(pycurl.SSLCERTPASSWD, '1234') # self.set_option(pycurl.SSLKEY, key_file) # self.set_option(pycurl.SSLKEYPASSWD, '1234') # for file in (key_file, cert_file): # if not os.path.exists(file): # print "\n[E] File '%s' doesn't exist\n" % file # return self.set_option(pycurl.SSL_VERIFYHOST, 0) self.set_option(pycurl.SSL_VERIFYPEER, 0) self.header.seek(0, 0) self.payload = "" for count in range(0, self.retries): time.sleep(self.delay) if self.dropcookie: self.set_option(pycurl.COOKIELIST, 'ALL') nocookie = ['Set-Cookie: ', ''] self.set_option(pycurl.HTTPHEADER, self.fakeheaders + nocookie) try: self.handle.perform() except: return return self.payload def get(self, url="", params=None): """ Get a url. """ if params: url += "?" + urllib.parse.urlencode(params) self.set_option(pycurl.HTTPGET, 1) return self.__request(url) def post(self, cgi, params): """ Post a url. """ self.set_option(pycurl.POST, 1) self.set_option(pycurl.POSTFIELDS, params) return self.__request(cgi) def body(self): """ Get the payload from the latest operation. """ return self.payload def info(self): """ Get an info dictionary from the selected url. """ self.header.seek(0, 0) url = self.handle.getinfo(pycurl.EFFECTIVE_URL) if url.startswith('http'): self.header.readline() m = email.message_from_string(str(self.header)) else: m = email.message_from_string(str(StringIO())) #m['effective-url'] = url m['http-code'] = str(self.handle.getinfo(pycurl.HTTP_CODE)) m['total-time'] = str(self.handle.getinfo(pycurl.TOTAL_TIME)) m['namelookup-time'] = str(self.handle.getinfo(pycurl.NAMELOOKUP_TIME)) m['connect-time'] = str(self.handle.getinfo(pycurl.CONNECT_TIME)) #m['pretransfer-time'] = str(self.handle.getinfo(pycurl.PRETRANSFER_TIME)) #m['redirect-time'] = str(self.handle.getinfo(pycurl.REDIRECT_TIME)) #m['redirect-count'] = str(self.handle.getinfo(pycurl.REDIRECT_COUNT)) #m['size-upload'] = str(self.handle.getinfo(pycurl.SIZE_UPLOAD)) #m['size-download'] = str(self.handle.getinfo(pycurl.SIZE_DOWNLOAD)) #m['speed-upload'] = str(self.handle.getinfo(pycurl.SPEED_UPLOAD)) m['header-size'] = str(self.handle.getinfo(pycurl.HEADER_SIZE)) m['request-size'] = str(self.handle.getinfo(pycurl.REQUEST_SIZE)) m['response-code'] = str(self.handle.getinfo(pycurl.RESPONSE_CODE)) m['ssl-verifyresult'] = str( self.handle.getinfo(pycurl.SSL_VERIFYRESULT)) try: m['content-type'] = (self.handle.getinfo(pycurl.CONTENT_TYPE) or '').strip(';') except: m['content-type'] = None m['cookielist'] = str(self.handle.getinfo(pycurl.INFO_COOKIELIST)) #m['content-length-download'] = str(self.handle.getinfo(pycurl.CONTENT_LENGTH_DOWNLOAD)) #m['content-length-upload'] = str(self.handle.getinfo(pycurl.CONTENT_LENGTH_UPLOAD)) #m['encoding'] = str(self.handle.getinfo(pycurl.ENCODING)) return m @classmethod def print_options(cls): """ Print selected options. """ print("\nCookie:", cls.cookie) print("User Agent:", cls.agent) print("Referer:", cls.referer) print("Extra Headers:", cls.headers) if cls.xforw == True: print("X-Forwarded-For:", "Random IP") else: print("X-Forwarded-For:", cls.xforw) if cls.xclient == True: print("X-Client-IP:", "Random IP") else: print("X-Client-IP:", cls.xclient) print("Authentication Type:", cls.atype) print("Authentication Credentials:", cls.acred) if cls.ignoreproxy == True: print("Proxy:", "Ignoring system default HTTP proxy") else: print("Proxy:", cls.proxy) print("Timeout:", cls.timeout) if cls.tcp_nodelay == True: print("Delaying:", "TCP_NODELAY activate") else: print("Delaying:", cls.delay, "seconds") if cls.followred == True: print("Follow 302 code:", "active") if cls.fli: print("Limit to follow:", cls.fli) else: print("Delaying:", cls.delay, "seconds") print("Retries:", cls.retries, "\n") def answered(self, check): """ Check for occurence of a string in the payload from the latest operation. """ return self.payload.find(check) >= 0 def close(self): """ Close the curl handle. """ self.handle.close() self.header.close() self._closed = True def __del__(self): if not self._closed: self.close()
""" StringIO => para ler ou escrever dados em arquivos do sistema operacional o software precisa ter permissão. - Permissão de leitura ->> para ler um arquivo - Permissao de escrita ->> para escrever em um arquivo StringIO => utilizado para ler e criar arquivos na memória ->> não será gravado no disco. Requer import do módulo StringIO """ # Fazendo o import do módulo StringIO from io import StringIO mensagem = 'Esta é apenas uma string normal' # Podemos criar um arquivo em memória já com uma string inserida ou mesmo vazio para que possamos depois inserir # alguma informação nele. Para isso basta: # Este arquivo estará disponível para leitura e escrita com a possibilidade de controlar o cursor com seek() arquivo = StringIO( mensagem) # Equivale a ->> arquivo = open('arquivo.txt', 'r', 'w') # Agora que o arquivo foi criado podemos manipluar o arquivo como já aprendido nas aulas anteriores # Arquivo criado e aberto podemos trabalhar print(f'Imprimindo nosso arquivo da memória -> {arquivo.read()}') # Vamos escrever no arquivo arquivo.write( '\nEsta é a nova string feita especialmente para teste de inclusão \n') # Vamos posicionar o cursor para uma nova leitura arquivo.seek(0) # Vamos conferir o resultado de nossa nova inserção no arquivo print(f'Imprimindo com novo conteúdo -> {arquivo.read()}')
class CSVLogger(_Logger): """ CSV output, consisting of one line per entry. Entries are separated by a separator (a semicolon per default). """ LoggerName = "csv" LoggerArgs = { "filename": "linkchecker-out.csv", 'separator': ';', "quotechar": '"', "dialect": "excel", } def __init__(self, **kwargs): """Store default separator and (os dependent) line terminator.""" args = self.get_args(kwargs) super().__init__(**args) self.init_fileoutput(args) self.separator = args['separator'] self.quotechar = args['quotechar'] self.dialect = args['dialect'] self.linesep = os.linesep def comment(self, s, **args): """Write CSV comment.""" self.writeln(s="# %s" % s, **args) def start_output(self): """Write checking start info as csv comment.""" super().start_output() row = [] if self.has_part("intro"): self.write_intro() self.flush() else: # write empty string to initialize file output self.write("") self.queue = StringIO() self.writer = csv.writer( self.queue, dialect=self.dialect, delimiter=self.separator, lineterminator=self.linesep, quotechar=self.quotechar, ) for s in Columns: if self.has_part(s): row.append(s) if row: self.writerow(row) def log_url(self, url_data): """Write csv formatted url check info.""" row = [] if self.has_part("urlname"): row.append(url_data.base_url) if self.has_part("parentname"): row.append(url_data.parent_url) if self.has_part("base"): row.append(url_data.base_ref) if self.has_part("result"): row.append(url_data.result) if self.has_part("warningstring"): row.append(self.linesep.join(x[1] for x in url_data.warnings)) if self.has_part("infostring"): row.append(self.linesep.join(url_data.info)) if self.has_part("valid"): row.append(url_data.valid) if self.has_part("url"): row.append(url_data.url) if self.has_part("line") and url_data.line is not None: row.append(url_data.line) if self.has_part("column") and url_data.column is not None: row.append(url_data.column) if self.has_part("name"): row.append(url_data.name) if self.has_part("dltime"): row.append(url_data.dltime) if self.has_part("dlsize"): row.append(url_data.size) if self.has_part("checktime"): row.append(url_data.checktime) if self.has_part("cached"): row.append(0) if self.has_part("level"): row.append(url_data.level) if self.has_part("modified"): row.append(self.format_modified(url_data.modified)) self.writerow(row) self.flush() def writerow(self, row): """Write one row in CSV format.""" self.writer.writerow(row) # Fetch UTF-8 output from the queue ... data = self.queue.getvalue() try: data = data.decode("utf-8") except AttributeError: pass # ... and write to the target stream self.write(data) # empty queue self.queue.seek(0) self.queue.truncate(0) def end_output(self, **kwargs): """Write end of checking info as csv comment.""" if self.has_part("outro"): self.write_outro() self.close_fileoutput()
def meds(data_buffer, varOut = [], chunksize=100000, log_file = None): # NOW IT IS SUPPER MEDS SPECIFIC: ADDED QC FILTERING.......Including accepting "P" flag :( setup_log(log_file) data_buffer.seek(0) logging.info('MEDS READER: READING MEDS DATA FROM BUFFER') # ========================================================================= # 1. Load json schema defining format # ========================================================================= logging.info('Reading MEDS default schema file') try: schema_df = schema_reader.meds() except Exception as e: logging.error('Reading MEDS schema: {}'.format(e)) return None # ========================================================================= # Clean buffer: 714 is a "fake" csv: it has actual widths; if spaces not eliminated, pandas.read>csv cannot parse correctly # ========================================================================= # rewind buffer to start logging.info('Eliminating buffer spaces') tic = time.time() output = StringIO() nlines = 0 for line in data_buffer.readlines(): # ipg7: 714 is like a "fake" csv: it has actual widths, if spaces not out, then pandas.read>csv cannot parse correctly output.write( line.replace(' ', '') ) nlines += 1 # ========================================================================= # We are now ready to read in data into data frame # ========================================================================= # Select out params if len(varOut) > 0: schema_out = schema_df.loc[varOut] else: schema_out = schema_df out_conv = map( ( lambda column_type : converters.get(column_type) ) , schema_out['column_type'] ) out_conv = dict( zip( schema_out['names'] , out_conv ) ) dtypes = dict(zip(schema_out['names'],schema_out['column_type'])) # Read data and fill in gaps output.seek(0) logging.info('Reading buffer to df: applying conversions where needed') # The following fails when reading the chunk afterwrds if there is a badly formed value (eg.'P' instead of integer....), converters fails, and pd does not throw the warning. df_tfrO = pd.read_csv( output, header = None, names = schema_out['names'], usecols =schema_out['names'], converters = out_conv, chunksize = chunksize,error_bad_lines = False, warn_bad_lines = True ) # Now control dtypes: output as defined in schema logging.info('Assigning schema data types to df columns: loading to temporal buffer') output_L0_buff = StringIO() if py3 else BytesIO() ichunk = 0 qced = dict([ (x,schema_out['qc_column'].loc[x]) for x in schema_out['names'] if schema_out['qc_column'].loc[x] == schema_out['qc_column'].loc[x] ]) try: for raw_df in df_tfrO: #col = None #for col in raw_df: # raw_df[col] = raw_df[col].values.astype(dtypes.get(col)) raw_df = raw_df.astype(dtypes) # tried above to see if faster.... # Mask 3,4 as missing for col in qced.keys(): raw_df[col] = raw_df[[col,qced.get(col)]].swifter.apply(lambda row: fmiss if row[qced.get(col)] not in ['0','1','P'] else row[col] ) raw_df.to_csv(output_L0_buff,columns = schema_out.index,mode = 'a', header=False, encoding = 'utf-8') ichunk += 1 except Exception as e: logging.error("Error reading chunk number {} ".format(ichunk)) logging.error("{}".format(e)) #if col: # logging.error("Error casting column {} to data type".format(col)) return None logging.info('Creating TextFileReader object of df') output_L0_buff.seek(0) outL0_tfr_Obj = pd.read_csv( output_L0_buff, names = schema_out.index, chunksize = chunksize, dtype = dtypes ) toc = time.time() logging.info('Time elapsed "{0:.3f}"'.format(toc - tic)) return(outL0_tfr_Obj,schema_out)
def imma1(infile, attmOut = '', chunksize=100000, log_file = None): setup_log(log_file) logging.info('IMMA1 READER: READING IMMA1 DATA FROM FILE') logging.info('Reading file {}'.format(infile)) # ========================================================================= # 1. Load json schema defining format # ========================================================================= [schema_general_df,schema_df] = schema_reader.imma1() # Now force types to know what we are working with: leave potential integers that might not be appl. to every case (eg. precision) as float to allow nan dtypes_general = {'minCount': 'uint8', 'maxCount': 'uint8', 'sentinal': 'object', 'length': 'uint16'} schema_general_df = schema_general_df.astype(dtype = dtypes_general) # ========================================================================= # 2. Read file into internal buffer and determine contents # ========================================================================= tic = time.time() with open(infile,'r', encoding='utf-8') as content_file: # Force unicode content = content_file.read() # str object content_buffer = StringIO(content) content_buffer.seek(0) toc = time.time() logging.info('Time to read data "{0:.3f}"'.format(toc - tic)) # ========================================================================= # 3. Read in data for core section to df # ========================================================================= # Generate converters for pandas dataframe (core section) core_conv = map( ( lambda column_type : converters.get(column_type) ) , schema_df['column_type'].loc['core'] ) core_conv = dict( zip( schema_df['names'].loc['core'] , core_conv ) ) tic = time.time() content_buffer.seek(0) core_df = pd.read_fwf(content_buffer, widths = schema_df['field_length'].loc['core'].astype('int16').values, header = None, names = schema_df['names'].loc['core'] , converters = core_conv, encoding = 'utf-8' ) toc = time.time() logging.info('Time to read core "{0:.3f}"'.format(toc - tic)) # ========================================================================= # 4. Get core and requested attms to output buffer # Output supplementary data to separate buffer # ========================================================================= # See what to output: if len(attmOut) == 0: attmOut = list(schema_general_df.index) attmOut.pop(0) sectionsOut = ['core'] + attmOut # Get core and attm in one buff and supp data in other [output_buff, output_modulo_buff] = imma1_extract_from_buffer(content_buffer, core_df, schema_general_df, sectionsOut ) output_buff.seek(0) output_modulo_buff.seek(0) output_buff.seek(0) output_modulo_buff.seek(0) # ========================================================================= # 5. Read core and selected attm into dataframe from output buffer # ========================================================================= # Extract only output sections from schema and index like field-section schema_out = schema_df.loc[sectionsOut] schema_out['names-section'] = schema_out['names'] + '_' + schema_out.index schema_out.set_index('names-section', inplace = True,drop = True) #========================================================================== out_conv = map( ( lambda column_type : converters.get(column_type) ) , schema_out['column_type'] ) out_conv = dict( zip( schema_out.index , out_conv ) ) tic = time.time() output_L0_buff = StringIO() if py3 else BytesIO() outraw_tfr_Obj = pandas.read_fwf( output_buff, widths = schema_out['field_length'].astype('int16').values, header = None, names = schema_out.index, converters=out_conv, chunksize = chunksize) # ========================================================================= # 6. Process and prepare to output: scale # Process in chunks # Ouput processed to temporal buffer to read as TextFileReader DF object on output # ========================================================================= nread = 0 for raw_df in outraw_tfr_Obj: for column in raw_df: ctype = schema_out['column_type'].loc[column] value_miss = fmiss if ( 'float' in ctype) else imiss # find missing values and apply, then convert integers to integers and so on.... try: if 'float' in ctype: missing = np.array(value_miss).astype(ctype).item(0) # Make sure our missing value is in same format: probably:HERE WE LOOSE THE DECIMALS WE'VE BEEN WORKING WITH SO FAR scale = schema_out['scale'].loc[column] raw_df[column] = raw_df[column].swifter.apply( lambda x: fmiss if x == fmiss else x*scale) #[apply_scale(value, scale , fmiss ) for value in raw_df[column]] if 'int' in ctype: raw_df[column] = raw_df[column].astype(ctype) if 'base36' in ctype: raw_df[column] = raw_df[column].astype('int16') except Exception as e: logging.error('Column {}'.format(column)) logging.error('Exception: {}'.format(e)) raise # Here we might loose the precision (number of decimals) we've been working with.... # We reduce the display precision (above) to limit this raw_df.to_csv(output_L0_buff,columns = schema_out.index,mode = 'a', header=False, encoding = 'utf-8' ) # will have to append here nread+= raw_df.shape[0] # Final dtypes: actual after scaling and base conversions out_num_dtypes = dict(zip( schema_out.index , schema_out['column_type'] ) ) out_num_dtypes = { k:('int16' if 'base' in v else v) for k, v in out_num_dtypes.items() } output_buff.seek(0) output_L0_buff.seek(0) # Now create TextFileReader objets to output non processed (raw) df and consistency checked (l0) df outputs # deb: outraw_tfr_Obj = pd.read_fwf( output_buff, widths = schema_out['field_length'].astype('int16').values, header = None, names = schema_out.index, converters=out_conv, chunksize = chunksize, dtype = out_num_dtypes_i0, na_values = na_values ) outL0_tfr_Obj = pd.read_csv( output_L0_buff, names = schema_out.index , chunksize = chunksize, dtype = out_num_dtypes) #, encoding = 'utf-8' ) toc = time.time() logging.info('Time to process data to output "{0:.3f}"'.format(toc - tic)) return(outL0_tfr_Obj,schema_out,output_modulo_buff)
chr_list.columns = ['chr'] chr_list["chr"] = chr_list.chr.apply(str) results = newdf.append(chr_list, ignore_index=True).fillna(0, downcast='integer') results = results.reset_index() results.chr = pd.to_numeric(results.chr, errors='coerce') results = results.sort_values(by=['chr'], ascending=True).fillna('X') list_of_rows_to_remove = [] for x, row in results.iterrows(): if results['cgma'][x] == 0: if results['chr'][x] == results['chr'].shift(+1)[x]: list_of_rows_to_remove.append(x) results = results[~results['index'].isin(list_of_rows_to_remove)] results = results.reset_index() results = results[[ 'chr', 'C2Nba', 'C2Nma', 'T2Nba', 'T2Nma', 'mapc2n', 'mapt2n', 'cgba' ]] results[['C2Nba', 'C2Nma', 'T2Nba', 'T2Nma']] = results[['C2Nba', 'C2Nma', 'T2Nba', 'T2Nma']].astype(int) results.chr = pd.to_numeric(results.chr, errors='ignore') from io import StringIO output = StringIO() results.to_csv(output, sep="\t", header=False, index=False) output.seek(0) print(output.read()) ###endscript###
class RPCHandler(asyncore.dispatcher_with_send): """ Handle client connection """ def __init__(self, sock, addr): asyncore.dispatcher_with_send.__init__(self, sock=sock) self.addr = addr self.handlers = {"ping": self.ping} # read buffer is maintained by user # write buffer is maintained by asyncore self.rbuf = StringIO() def handle_connect(self): """ Callback when new connection is accepted. :return: """ pass def handle_close(self): """ Callback before connection is closed. :return: """ print(self.addr, "bye") self.close() def handle_read(self): """ Callback when read event occurs. :return: """ while True: try: content = self.recv(1024) if content: self.rbuf.write(content.decode('utf-8')) if len(content) < 1024: break except Exception as e: print(e) self.handle_rpc() def handle_rpc(self): """ Unpack read message and handle with it. :return: """ while True: # loop handling self.rbuf.seek(0) length_prefix = self.rbuf.read(4) if len(length_prefix) < 4: # half-package break try: length, = struct.unpack("I", length_prefix.encode("utf-8")) except Exception as e: print(e.__traceback__) body = self.rbuf.read(length) if len(body) < length: # half-package break request = json.loads(body) input = request["in"] params = request["params"] handler = self.handlers[input] handler(params) # cut read buffer left = self.rbuf.getvalue()[length + 4:] self.rbuf = StringIO() self.rbuf.write(left) # move position to EOF self.rbuf.seek(0, 2) def ping(self, params): self.send_result("pong", params) def send_result(self, out, result): response = json.dumps({"out": out, "result": result}) length_prefix = struct.pack("I", len(response)) self.send(length_prefix) self.send(response.encode('utf-8'))
def to_csv( df, path_or_buf=None, sep=",", na_rep="", columns=None, header=True, index=True, line_terminator="\n", chunksize=None, **kwargs, ): """{docstring}""" return_as_string = False if path_or_buf is None: path_or_buf = StringIO() return_as_string = True path_or_buf = ioutils.get_writer_filepath_or_buffer( path_or_data=path_or_buf, mode="w", **kwargs) if columns is not None: try: df = df[columns] except KeyError: raise NameError( "Dataframe doesn't have the labels provided in columns") if sep == "-": # TODO: Remove this error once following issue is fixed: # https://github.com/rapidsai/cudf/issues/6699 if any( isinstance(col, cudf.core.column.DatetimeColumn) for col in df._data.columns): raise ValueError( "sep cannot be '-' when writing a datetime64 dtype to csv, " "refer to: https://github.com/rapidsai/cudf/issues/6699") # TODO: Need to typecast categorical columns to the underlying # categories dtype to write the actual data to csv. Remove this # workaround once following issue is fixed: # https://github.com/rapidsai/cudf/issues/6661 if any( isinstance(col, cudf.core.column.CategoricalColumn) for col in df._data.columns) or isinstance(df.index, cudf.CategoricalIndex): df = df.copy(deep=False) for col_name, col in df._data.items(): if isinstance(col, cudf.core.column.CategoricalColumn): df._data[col_name] = col.astype(col.cat().categories.dtype) if isinstance(df.index, cudf.CategoricalIndex): df.index = df.index.astype(df.index.categories.dtype) rows_per_chunk = chunksize if chunksize else len(df) if ioutils.is_fsspec_open_file(path_or_buf): with path_or_buf as file_obj: file_obj = ioutils.get_IOBase_writer(file_obj) libcudf.csv.write_csv( df, path_or_buf=file_obj, sep=sep, na_rep=na_rep, header=header, line_terminator=line_terminator, rows_per_chunk=rows_per_chunk, index=index, ) else: libcudf.csv.write_csv( df, path_or_buf=path_or_buf, sep=sep, na_rep=na_rep, header=header, line_terminator=line_terminator, rows_per_chunk=rows_per_chunk, index=index, ) if return_as_string: path_or_buf.seek(0) return path_or_buf.read()
async def generate(self, ctx: Context, name: str) -> None: sio = StringIO() text = self.config.hget("config:markov:custom_files", name) sio.write(text) sio.seek(0) await self.gen_markov(ctx, path=sio, use_newlines=False)
def export_product_list(self): inter_companytransfer_line_obj = self.env[ 'inter.company.transfer.line.ept'] line_ids = inter_companytransfer_line_obj.search([ ('inter_transfer_id', 'in', self.env.context.get('active_ids')) ]) inter_companytransfer_name = line_ids and line_ids[ 0].inter_transfer_id.name or '' if self.report_type == 'csv': buffer = StringIO() buffer.seek(0) field_names = ['default_code', 'qty', 'price'] csvwriter = DictWriter(buffer, field_names, delimiter=',') csvwriter.writer.writerow(field_names) line_no = 0 for line in line_ids: data = { 'default_code': line.product_id.default_code or "", 'qty': line.quantity or 0, 'price': line.price or 0 } line_no = line_no + 1 csvwriter.writerow(data) buffer.seek(0) file_data = buffer.read().encode() file_data = base64.encodestring(file_data) self.write({'datas': file_data}) return { 'type': 'ir.actions.act_url', 'url': 'web/content/?model=import.export.product.list.ept&download=true&field=datas&id=%s&filename=Export_Product_List_%s.csv' % (self.id, inter_companytransfer_name), 'target': 'new', } elif self.report_type == 'xls': workbook = xlwt.Workbook() worksheet = workbook.add_sheet("Normal Sales Data", cell_overwrite_ok=True) worksheet.write(0, 0, 'Default Code') worksheet.write(0, 1, 'Qty') worksheet.write(0, 2, 'Price') row = 1 for line in line_ids: worksheet.write(row, 0, line.product_id.default_code or "") worksheet.write(row, 1, line.quantity or 0) worksheet.write(row, 2, line.price or 0) row = row + 1 fp = BytesIO() workbook.save(fp) fp.seek(0) report_data_file = base64.encodestring(fp.read()) fp.close() self.write({'datas': report_data_file}) return { 'type': 'ir.actions.act_url', 'url': 'web/content/?model=import.export.product.list.ept&download=true&field=datas&id=%s&filename=Export_Product_List_%s.xls' % (self.id, inter_companytransfer_name), 'target': 'new', }
def to_string(df, **kwargs): out = StringIO() df.to_perseus(out, **kwargs) out.seek(0) return ''.join(out.readlines())
def test_opened_file(self): sio = StringIO() sio.write('test_data') sio.seek(0) with FileOrPath(sio) as fp: eq_('test_data', fp.read())
import os import numpy as np import re import pandas as pd from io import StringIO DEFAULT_PATH = './../data/' TEST_PATH = os.path.join(DEFAULT_PATH, 'en_data.csv') for_pd = StringIO() with open(TEST_PATH, encoding="utf8", errors='ignore') as p: for line in p: new_line = re.sub(r',', '|', line.rstrip(), count=4) print(new_line, file=for_pd) for_pd.seek(0) test = pd.read_csv(for_pd, sep='|', header=0) del test['id'] del test['i_dialog'] del test['i_utterance'] del test['speaker'] with open(os.path.join(DEFAULT_PATH, 'test.txt'), "a") as f: for index, row in test.iterrows(): orig_utterance = row[0] cleaned_utterance = re.sub(u'"', u"", orig_utterance) cleaned_utterance = re.sub(u'’', u"'", cleaned_utterance) line = cleaned_utterance
class _XMLTestResult(_TextTestResult): """ A test result class that can express test results in a XML report. Used by XMLTestRunner. """ def __init__(self, stream=sys.stderr, descriptions=1, verbosity=1, elapsed_times=True, properties=None, infoclass=None): _TextTestResult.__init__(self, stream, descriptions, verbosity) self._stdout_data = None self._stderr_data = None self._stdout_capture = StringIO() self.__stdout_saved = None self._stderr_capture = StringIO() self.__stderr_saved = None self.successes = [] self.callback = None self.elapsed_times = elapsed_times self.properties = properties # junit testsuite properties self.filename = None self.lineno = None self.doc = None if infoclass is None: self.infoclass = _TestInfo else: self.infoclass = infoclass def _prepare_callback(self, test_info, target_list, verbose_str, short_str): """ Appends a `infoclass` to the given target list and sets a callback method to be called by stopTest method. """ test_info.filename = self.filename test_info.lineno = self.lineno test_info.doc = self.doc target_list.append(test_info) def callback(): """Prints the test method outcome to the stream, as well as the elapsed time. """ test_info.test_finished() # Ignore the elapsed times for a more reliable unit testing if not self.elapsed_times: self.start_time = self.stop_time = 0 if self.showAll: self.stream.writeln( '%s (%.3fs)' % (verbose_str, test_info.elapsed_time) ) elif self.dots: self.stream.write(short_str) self.stream.flush() self.callback = callback def startTest(self, test): """ Called before execute each test method. """ self.start_time = time() TestResult.startTest(self, test) try: if getattr(test, '_dt_test', None) is not None: # doctest.DocTestCase self.filename = test._dt_test.filename self.lineno = test._dt_test.lineno else: # regular unittest.TestCase? test_method = getattr(test, test._testMethodName) test_class = type(test) # Note: inspect can get confused with decorators, so use class. self.filename = inspect.getsourcefile(test_class) # Handle partial and partialmethod objects. test_method = getattr(test_method, 'func', test_method) _, self.lineno = inspect.getsourcelines(test_method) self.doc = test_method.__doc__ except (AttributeError, IOError, TypeError): # issue #188, #189, #195 # some frameworks can make test method opaque. pass if self.showAll: self.stream.write(' ' + self.getDescription(test)) self.stream.write(" ... ") self.stream.flush() def _setupStdout(self): """ Capture stdout / stderr by replacing sys.stdout / sys.stderr """ super(_XMLTestResult, self)._setupStdout() self.__stdout_saved = sys.stdout sys.stdout = _DuplicateWriter(sys.stdout, self._stdout_capture) self.__stderr_saved = sys.stderr sys.stderr = _DuplicateWriter(sys.stderr, self._stderr_capture) def _restoreStdout(self): """ Stop capturing stdout / stderr and recover sys.stdout / sys.stderr """ if self.__stdout_saved: sys.stdout = self.__stdout_saved self.__stdout_saved = None if self.__stderr_saved: sys.stderr = self.__stderr_saved self.__stderr_saved = None self._stdout_capture.seek(0) self._stdout_capture.truncate() self._stderr_capture.seek(0) self._stderr_capture.truncate() super(_XMLTestResult, self)._restoreStdout() def _save_output_data(self): self._stdout_data = self._stdout_capture.getvalue() self._stderr_data = self._stderr_capture.getvalue() def stopTest(self, test): """ Called after execute each test method. """ self._save_output_data() # self._stdout_data = sys.stdout.getvalue() # self._stderr_data = sys.stderr.getvalue() _TextTestResult.stopTest(self, test) self.stop_time = time() if self.callback and callable(self.callback): self.callback() self.callback = None def addSuccess(self, test): """ Called when a test executes successfully. """ self._save_output_data() self._prepare_callback( self.infoclass(self, test), self.successes, 'ok', '.' ) @failfast def addFailure(self, test, err): """ Called when a test method fails. """ self._save_output_data() testinfo = self.infoclass( self, test, self.infoclass.FAILURE, err) self.failures.append(( testinfo, self._exc_info_to_string(err, test) )) self._prepare_callback(testinfo, [], 'FAIL', 'F') @failfast def addError(self, test, err): """ Called when a test method raises an error. """ self._save_output_data() testinfo = self.infoclass( self, test, self.infoclass.ERROR, err) self.errors.append(( testinfo, self._exc_info_to_string(err, test) )) self._prepare_callback(testinfo, [], 'ERROR', 'E') def addSubTest(self, testcase, test, err): """ Called when a subTest method raises an error. """ if err is not None: errorText = None errorValue = None errorList = None if issubclass(err[0], test.failureException): errorText = 'FAIL' errorValue = self.infoclass.FAILURE errorList = self.failures else: errorText = 'ERROR' errorValue = self.infoclass.ERROR errorList = self.errors self._save_output_data() testinfo = self.infoclass( self, testcase, errorValue, err, subTest=test) errorList.append(( testinfo, self._exc_info_to_string(err, testcase) )) self._prepare_callback(testinfo, [], errorText, errorText[0]) def addSkip(self, test, reason): """ Called when a test method was skipped. """ self._save_output_data() testinfo = self.infoclass( self, test, self.infoclass.SKIP, reason) testinfo.test_exception_name = 'skip' testinfo.test_exception_message = reason self.skipped.append((testinfo, reason)) self._prepare_callback(testinfo, [], 'skip', 's') def addExpectedFailure(self, test, err): """ Missing in xmlrunner, copy-pasted from xmlrunner addError. """ self._save_output_data() testinfo = self.infoclass(self, test, self.infoclass.SKIP, err) testinfo.test_exception_name = 'XFAIL' testinfo.test_exception_message = 'expected failure: {}'.format(testinfo.test_exception_message) self.expectedFailures.append((testinfo, self._exc_info_to_string(err, test))) self._prepare_callback(testinfo, [], 'expected failure', 'x') @failfast def addUnexpectedSuccess(self, test): """ Missing in xmlrunner, copy-pasted from xmlrunner addSuccess. """ self._save_output_data() testinfo = self.infoclass(self, test) # do not set outcome here because it will need exception testinfo.outcome = self.infoclass.ERROR # But since we want to have error outcome, we need to provide additional fields: testinfo.test_exception_name = 'UnexpectedSuccess' testinfo.test_exception_message = ('Unexpected success: This test was marked as expected failure but passed, ' 'please review it') self.unexpectedSuccesses.append((testinfo, 'unexpected success')) self._prepare_callback(testinfo, [], 'unexpected success', 'u') def printErrorList(self, flavour, errors): """ Writes information about the FAIL or ERROR to the stream. """ for test_info, dummy in errors: self.stream.writeln(self.separator1) self.stream.writeln( '%s [%.3fs]: %s' % (flavour, test_info.elapsed_time, test_info.test_description) ) self.stream.writeln(self.separator2) self.stream.writeln('%s' % test_info.get_error_info()) self.stream.flush() def _get_info_by_testcase(self): """ Organizes test results by TestCase module. This information is used during the report generation, where a XML report will be created for each TestCase. """ tests_by_testcase = {} for tests in (self.successes, self.failures, self.errors, self.skipped, self.expectedFailures, self.unexpectedSuccesses): for test_info in tests: if isinstance(test_info, tuple): # This is a skipped, error or a failure test case test_info = test_info[0] testcase_name = test_info.test_name if testcase_name not in tests_by_testcase: tests_by_testcase[testcase_name] = [] tests_by_testcase[testcase_name].append(test_info) return tests_by_testcase def _report_testsuite_properties(xml_testsuite, xml_document, properties): if properties: xml_properties = xml_document.createElement('properties') xml_testsuite.appendChild(xml_properties) for key, value in properties.items(): prop = xml_document.createElement('property') prop.setAttribute('name', str(key)) prop.setAttribute('value', str(value)) xml_properties.appendChild(prop) _report_testsuite_properties = staticmethod(_report_testsuite_properties) def _report_testsuite(suite_name, tests, xml_document, parentElement, properties): """ Appends the testsuite section to the XML document. """ testsuite = xml_document.createElement('testsuite') parentElement.appendChild(testsuite) module_name = suite_name.rpartition('.')[0] file_name = module_name.replace('.', '/') + '.py' testsuite.setAttribute('name', suite_name) testsuite.setAttribute('tests', str(len(tests))) testsuite.setAttribute('file', file_name) testsuite.setAttribute( 'time', '%.3f' % sum(map(lambda e: e.elapsed_time, tests)) ) if tests: testsuite.setAttribute( 'timestamp', max(map(lambda e: e.timestamp, tests)) ) failures = filter(lambda e: e.outcome == e.FAILURE, tests) testsuite.setAttribute('failures', str(len(list(failures)))) errors = filter(lambda e: e.outcome == e.ERROR, tests) testsuite.setAttribute('errors', str(len(list(errors)))) skips = filter(lambda e: e.outcome == _TestInfo.SKIP, tests) testsuite.setAttribute('skipped', str(len(list(skips)))) _XMLTestResult._report_testsuite_properties( testsuite, xml_document, properties) for test in tests: _XMLTestResult._report_testcase(test, testsuite, xml_document) return testsuite _report_testsuite = staticmethod(_report_testsuite) def _test_method_name(test_id): """ Returns the test method name. """ # Trick subtest referencing objects subtest_parts = test_id.split(' ') test_method_name = subtest_parts[0].split('.')[-1] subtest_method_name = [test_method_name] + subtest_parts[1:] return ' '.join(subtest_method_name) _test_method_name = staticmethod(_test_method_name) def _createCDATAsections(xmldoc, node, text): text = safe_unicode(text) pos = text.find(']]>') while pos >= 0: tmp = text[0:pos+2] cdata = xmldoc.createCDATASection(tmp) node.appendChild(cdata) text = text[pos+2:] pos = text.find(']]>') cdata = xmldoc.createCDATASection(text) node.appendChild(cdata) _createCDATAsections = staticmethod(_createCDATAsections) def _report_testcase(test_result, xml_testsuite, xml_document): """ Appends a testcase section to the XML document. """ testcase = xml_document.createElement('testcase') xml_testsuite.appendChild(testcase) class_name = re.sub(r'^__main__.', '', test_result.id()) # Trick subtest referencing objects class_name = class_name.split(' ')[0].rpartition('.')[0] testcase.setAttribute('classname', class_name) testcase.setAttribute( 'name', _XMLTestResult._test_method_name(test_result.test_id) ) testcase.setAttribute('time', '%.3f' % test_result.elapsed_time) testcase.setAttribute('timestamp', test_result.timestamp) if test_result.filename is not None: # Try to make filename relative to current directory. filename = os.path.relpath(test_result.filename) filename = test_result.filename if filename.startswith('../') else filename testcase.setAttribute('file', filename) if test_result.lineno is not None: testcase.setAttribute('line', str(test_result.lineno)) if test_result.doc is not None: comment = str(test_result.doc) # The use of '--' is forbidden in XML comments comment = comment.replace('--', '--') testcase.appendChild(xml_document.createComment(comment)) result_elem_name = test_result.OUTCOME_ELEMENTS[test_result.outcome] if result_elem_name is not None: result_elem = xml_document.createElement(result_elem_name) testcase.appendChild(result_elem) result_elem.setAttribute( 'type', test_result.test_exception_name ) result_elem.setAttribute( 'message', test_result.test_exception_message ) if test_result.get_error_info(): error_info = safe_unicode(test_result.get_error_info()) _XMLTestResult._createCDATAsections( xml_document, result_elem, error_info) if test_result.stdout: systemout = xml_document.createElement('system-out') testcase.appendChild(systemout) _XMLTestResult._createCDATAsections( xml_document, systemout, test_result.stdout) if test_result.stderr: systemout = xml_document.createElement('system-err') testcase.appendChild(systemout) _XMLTestResult._createCDATAsections( xml_document, systemout, test_result.stderr) _report_testcase = staticmethod(_report_testcase) def generate_reports(self, test_runner): """ Generates the XML reports to a given XMLTestRunner object. """ from xml.dom.minidom import Document all_results = self._get_info_by_testcase() outputHandledAsString = \ isinstance(test_runner.output, str) if (outputHandledAsString and not os.path.exists(test_runner.output)): os.makedirs(test_runner.output) if not outputHandledAsString: doc = Document() testsuite = doc.createElement('testsuites') doc.appendChild(testsuite) parentElement = testsuite for suite, tests in all_results.items(): if outputHandledAsString: doc = Document() parentElement = doc suite_name = suite if test_runner.outsuffix: # not checking with 'is not None', empty means no suffix. suite_name = '%s-%s' % (suite, test_runner.outsuffix) # Build the XML file testsuite = _XMLTestResult._report_testsuite( suite_name, tests, doc, parentElement, self.properties ) if outputHandledAsString: xml_content = doc.toprettyxml( indent='\t', encoding=test_runner.encoding ) filename = path.join( test_runner.output, 'TEST-%s.xml' % suite_name) with open(filename, 'wb') as report_file: report_file.write(xml_content) if self.showAll: self.stream.writeln('Generated XML report: {}'.format(filename)) if not outputHandledAsString: # Assume that test_runner.output is a stream xml_content = doc.toprettyxml( indent='\t', encoding=test_runner.encoding ) test_runner.output.write(xml_content) def _exc_info_to_string(self, err, test): """Converts a sys.exc_info()-style tuple of values into a string.""" return super(_XMLTestResult, self)._exc_info_to_string(err, test)
class RuleBasedStateMachine(metaclass=StateMachineMeta): """A RuleBasedStateMachine gives you a structured way to define state machines. The idea is that a state machine carries a bunch of types of data divided into Bundles, and has a set of rules which may read data from bundles (or just from normal strategies) and push data onto bundles. At any given point a random applicable rule will be executed. """ _rules_per_class = {} # type: Dict[type, List[classmethod]] _invariants_per_class = {} # type: Dict[type, List[classmethod]] _base_rules_per_class = {} # type: Dict[type, List[classmethod]] _initializers_per_class = {} # type: Dict[type, List[classmethod]] _base_initializers_per_class = {} # type: Dict[type, List[classmethod]] def __init__(self): if not self.rules(): raise InvalidDefinition( f"Type {type(self).__name__} defines no rules") self.bundles = {} # type: Dict[str, list] self.name_counter = 1 self.names_to_values = {} # type: Dict[str, Any] self.__stream = StringIO() self.__printer = RepresentationPrinter(self.__stream) self._initialize_rules_to_run = copy(self.initialize_rules()) self._rules_strategy = RuleStrategy(self) def _pretty_print(self, value): if isinstance(value, VarReference): return value.name self.__stream.seek(0) self.__stream.truncate(0) self.__printer.output_width = 0 self.__printer.buffer_width = 0 self.__printer.buffer.clear() self.__printer.pretty(value) self.__printer.flush() return self.__stream.getvalue() def __repr__(self): return "{}({})".format(type(self).__name__, nicerepr(self.bundles)) def _new_name(self): result = f"v{self.name_counter}" self.name_counter += 1 return result def _last_names(self, n): assert self.name_counter > n count = self.name_counter return [f"v{i}" for i in range(count - n, count)] def bundle(self, name): return self.bundles.setdefault(name, []) @classmethod def initialize_rules(cls): try: return cls._initializers_per_class[cls] except KeyError: pass for _, v in inspect.getmembers(cls): r = getattr(v, INITIALIZE_RULE_MARKER, None) if r is not None: cls.define_initialize_rule(r.targets, r.function, r.arguments, r.precondition) cls._initializers_per_class[ cls] = cls._base_initializers_per_class.pop(cls, []) return cls._initializers_per_class[cls] @classmethod def rules(cls): try: return cls._rules_per_class[cls] except KeyError: pass for _, v in inspect.getmembers(cls): r = getattr(v, RULE_MARKER, None) if r is not None: cls.define_rule(r.targets, r.function, r.arguments, r.precondition) cls._rules_per_class[cls] = cls._base_rules_per_class.pop(cls, []) return cls._rules_per_class[cls] @classmethod def invariants(cls): try: return cls._invariants_per_class[cls] except KeyError: pass target = [] for _, v in inspect.getmembers(cls): i = getattr(v, INVARIANT_MARKER, None) if i is not None: target.append(i) cls._invariants_per_class[cls] = target return cls._invariants_per_class[cls] @classmethod def define_initialize_rule(cls, targets, function, arguments, precondition=None): converted_arguments = {} for k, v in arguments.items(): converted_arguments[k] = v if cls in cls._initializers_per_class: target = cls._initializers_per_class[cls] else: target = cls._base_initializers_per_class.setdefault(cls, []) return target.append( Rule(targets, function, converted_arguments, precondition)) @classmethod def define_rule(cls, targets, function, arguments, precondition=None): converted_arguments = {} for k, v in arguments.items(): converted_arguments[k] = v if cls in cls._rules_per_class: target = cls._rules_per_class[cls] else: target = cls._base_rules_per_class.setdefault(cls, []) return target.append( Rule(targets, function, converted_arguments, precondition)) def _print_step(self, rule, data, result): self.step_count = getattr(self, "step_count", 0) + 1 # If the step has target bundles, and the result is a MultipleResults # then we want to assign to multiple variables. if isinstance(result, MultipleResults): n_output_vars = len(result.values) else: n_output_vars = 1 if rule.targets and n_output_vars >= 1: output_assignment = ", ".join( self._last_names(n_output_vars)) + " = " else: output_assignment = "" report("{}state.{}({})".format( output_assignment, rule.function.__name__, ", ".join("%s=%s" % kv for kv in data.items()), )) def _add_result_to_targets(self, targets, result): name = self._new_name() self.__printer.singleton_pprinters.setdefault( id(result), lambda obj, p, cycle: p.text(name)) self.names_to_values[name] = result for target in targets: self.bundles.setdefault(target, []).append(VarReference(name)) def check_invariants(self): for invar in self.invariants(): if invar.precondition and not invar.precondition(self): continue invar.function(self) def teardown(self): """Called after a run has finished executing to clean up any necessary state. Does nothing by default. """ TestCase = TestCaseProperty() @classmethod @lru_cache() def _to_test_case(state_machine_class): class StateMachineTestCase(TestCase): settings = Settings(deadline=None, suppress_health_check=HealthCheck.all()) def runTest(self): run_state_machine_as_test(state_machine_class) runTest.is_hypothesis_test = True StateMachineTestCase.__name__ = state_machine_class.__name__ + ".TestCase" StateMachineTestCase.__qualname__ = qualname( state_machine_class) + ".TestCase" return StateMachineTestCase
# Converts the string to a file. So that we can use read/write operations easily. page = StringIO(page) page.read(4) # page header: 4 bytes: Always 00000100 # Number of cookies in each page, first 4 bytes after the page header in every page. num_cookies = unpack('<i', page.read(4))[0] cookie_offsets = [] for nc in range(num_cookies): # Every page contains >= one cookie. Fetch cookie starting point from page starting byte cookie_offsets.append(unpack('<i', page.read(4))[0]) page.read(4) # end of page header: Always 00000000 cookie = '' for offset in cookie_offsets: page.seek(offset) # Move the page pointer to the cookie starting point cookiesize = unpack('<i', page.read(4))[0] # fetch cookie size cookie = StringIO(page.read(cookiesize)) # read the complete cookie cookie.read(4) # unknown # Cookie flags: 1=secure, 4=httponly, 5=secure+httponly flags = unpack('<i', cookie.read(4))[0] cookie_flags = '' if flags == 0: cookie_flags = '' elif flags == 1: cookie_flags = 'Secure' elif flags == 4: cookie_flags = 'HttpOnly' elif flags == 5:
class TestDBUtil(DBTestCase): def setUp(self): """Prepare state for test cases""" DBTestCase.setUp(self) self.SCRIPT_FILE = StringIO() self.SCRIPT_FILE.write( "# Create table to test on. Also testing that comment tag is recognized\n" ) self.SCRIPT_FILE.write("\n") self.SCRIPT_FILE.write("create table TestTypes\n") self.SCRIPT_FILE.write("(\n") self.SCRIPT_FILE.write(" TestTypes_id serial,\n") self.SCRIPT_FILE.write( " MyText varchar(50), /* Same as character varying, also test standard SQL comment tags */\n" ) self.SCRIPT_FILE.write(" MyInteger integer,\n") self.SCRIPT_FILE.write( " MyReal real, -- Floating point number, also test standard SQL comment tag\n" ) self.SCRIPT_FILE.write( " MyDateTime TIMESTAMP, -- PostgreSQL uses TIMESTAMP, but MySQL doesn't do NULL values right, so have to use DATETIME for MySQL?\n" ) self.SCRIPT_FILE.write(" MyYesNo boolean\n") self.SCRIPT_FILE.write(");\n") self.SCRIPT_FILE.write( "ALTER TABLE TestTypes ADD CONSTRAINT TestTypes_id PRIMARY KEY (TestTypes_id);\n" ) # Should auto-create testtypes_testtypes_id_seq sequence self.SCRIPT_FILE.write( "CREATE INDEX TestTypes_MyInteger_INDEX ON TestTypes(MyInteger);\n" ) self.SCRIPT_FILE.write("\n") self.SCRIPT_FILE.write( "insert into TestTypes (MyText,MyInteger,MyReal,MyDateTime,MyYesNo)\n" ) self.SCRIPT_FILE.write( "values ('Sample Text', 123,123.45,'2004-09-08 19:41:47.292000',True);\n" ) self.SCRIPT_FILE.write( "insert into TestTypes (MyText,MyInteger,MyReal,MyDateTime,MyYesNo)\n" ) self.SCRIPT_FILE.write( "values ('Joe Mama', 234,23.45,'1990-10-03 19:41:47.292000',False);\n" ) self.SCRIPT_FILE.write( "insert into TestTypes (MyText,MyInteger,MyReal,MyDateTime,MyYesNo)\n" ) self.SCRIPT_FILE.write( "values ('Mo Fo', 345,3.45,'2014-01-04 19:41:47.292000',True);\n" ) self.SCRIPT_FILE.write("\n") self.SCRIPT_FILE = StringIO(self.SCRIPT_FILE.getvalue()) self.DATA_TABLE = "TestTypes" self.DATA_COLS = "MyInteger\tMyReal\tMyYesNo\tMyText\n" self.DATA_FILE = StringIO() self.DATA_FILE.write('100\t100.1\tNone\tATest\n') self.DATA_FILE.write('200\t200.2\t' + FALSE_STR + '\tNone\n') self.DATA_FILE.write('200\t200.2\t' + FALSE_STR + '\t\n') # Skip None tag at end of line, test that white space won't get lost self.DATA_FILE.write('300\t\t' + TRUE_STR + '\tCTest\n') self.DATA_FILE = StringIO(self.DATA_FILE.getvalue()) self.DATA_ROWS = [] self.DATA_ROWS.append([ 100, 100.1, None, "ATest", ]) self.DATA_ROWS.append([ 200, 200.2, False, None, ]) self.DATA_ROWS.append([ 300, None, True, "CTest", ]) self.MULTI_LINE_DATA_FILE = StringIO() self.MULTI_LINE_DATA_FILE.write( 'myinteger\t"MyReal"\t"MyYesNo"\tMyText\n') self.MULTI_LINE_DATA_FILE.write( '100\t100.1\tNone\t"""A"" Test and ""more"""\n') self.MULTI_LINE_DATA_FILE.write('200\t200.2\t' + FALSE_STR + '\t""\n') self.MULTI_LINE_DATA_FILE.write('300\tNone\t' + TRUE_STR + '\t"C\\nTest"\t\n') self.MULTI_LINE_DATA_FILE = StringIO( self.MULTI_LINE_DATA_FILE.getvalue()) self.MULTI_LINE_DATA_ROWS = [] self.MULTI_LINE_DATA_ROWS.append([ 100, 100.1, None, '"A" Test and "more"', ]) self.MULTI_LINE_DATA_ROWS.append([ 200, 200.2, False, None, ]) self.MULTI_LINE_DATA_ROWS.append([ 300, None, True, 'C\\nTest', ]) # ID summary data to make it easier to verify stuff self.COL_NAMES = self.DATA_COLS.split() self.ID_COL = self.COL_NAMES[0] self.ID_DATA = [] for row in self.DATA_ROWS: self.ID_DATA.append(row[0]) # Build query to get update rows self.DATA_QUERY = SQLQuery() for col in self.COL_NAMES: self.DATA_QUERY.addSelect(col) self.DATA_QUERY.addFrom(self.DATA_TABLE) self.DATA_QUERY.addWhereIn(self.ID_COL, self.ID_DATA) self.DATA_QUERY.addOrderBy(self.ID_COL) def tearDown(self): """Restore state from any setUp or test steps""" try: DBUtil.execute("drop table TestTypes") pass except Exception as err: log.warning(err) pass DBTestCase.tearDown(self) def test_runDBScript(self): # Just run a DB Script and make sure no ProgrammingErrors are raised. DBUtil.runDBScript(self.SCRIPT_FILE, False) # Run some other commands to see if scripts produced expected results results = DBUtil.execute( "select * from TestTypes where MyInteger > %s", (200, )) self.assertEqual(2, len(results)) # Test extra "includeColumnNames" parameter results = DBUtil.execute( "select TestTypes_id,MyText,MyInteger,MyReal,MyDateTime,MyYesNo from TestTypes where MyInteger < %s", (100, ), True) expected = [ "TestTypes_id", "MyText", "MyInteger", "MyReal", "MyDateTime", "MyYesNo" ] for iCol in range(len(expected)): # Ignore case for comparison expected[iCol] = expected[iCol].lower() results[0][iCol] = results[0][iCol].lower() self.assertEqual(expected, results[0]) self.assertEqual(0, len(results) - 1) def test_runDBScript_commandline(self): # Equivalent to test_runDBScript, but try higher level interface # through command-line "main" method origStdin = sys.stdin sys.stdin = self.SCRIPT_FILE argv = ["DBUtil.py", "--script", "-"] DBUtil.main(argv) sys.stdin = origStdin # Run some other commands to see if scripts produced expected results results = DBUtil.execute( "select * from TestTypes where MyInteger > %s", (200, )) self.assertEqual(2, len(results)) results = DBUtil.execute( "select * from TestTypes where MyInteger < %s", (100, )) self.assertEqual(0, len(results)) def test_runDBScript_skipErrors(self): # Similar to test_runDBScript_commandline, but test skipErrors option origStdin = sys.stdin sys.stdin = self.SCRIPT_FILE argv = ["DBUtil.py", "--script", "-"] DBUtil.main(argv) sys.stdin = origStdin # Run script again. Should generate errors from redundant create table, etc. But skip self.SCRIPT_FILE.seek(0) origStdin = sys.stdin sys.stdin = self.SCRIPT_FILE argv = ["DBUtil.py", "--script", "--skipErrors", "-"] DBUtil.main(argv) sys.stdin = origStdin # Run script again. Should generate errors from redundant create table, etc. Verify by catch self.SCRIPT_FILE.seek(0) origStdin = sys.stdin sys.stdin = self.SCRIPT_FILE argv = ["DBUtil.py", "--script", "-"] expectErr = True actualErr = False try: DBUtil.main(argv) except Exception as err: actualErr = True self.assertEqual(expectErr, actualErr) sys.stdin = origStdin def test_execute_commandline(self): # Run basic executes for both an update and a select query, but # using the higher-level command-line "main" method interface DBUtil.runDBScript( self.SCRIPT_FILE, False) # Assume this works based on test_runDBScript method origStdout = sys.stdout sys.stdout = StringIO() argv = [ "DBUtil.py", "select count(*) from TestTypes where MyInteger > 200", "-" ] DBUtil.main(argv) self.assertEqual(2, int(sys.stdout.getvalue())) sys.stdout = origStdout origStdout = sys.stdout sys.stdout = StringIO() argv = [ "DBUtil.py", "insert into TestTypes (MyText,MyInteger,MyYesNo) values ('Another',255,True)", "-" ] DBUtil.main(argv) #self.assertEqual( 1, int(sys.stdout.getvalue()) ) sys.stdout = origStdout origStdout = sys.stdout sys.stdout = StringIO() argv = [ "DBUtil.py", "select count(*) from TestTypes where MyInteger > 200", "-" ] DBUtil.main(argv) self.assertEqual(3, int(sys.stdout.getvalue())) sys.stdout = origStdout # Different test, includeColumnNames origStdout = sys.stdout sys.stdout = StringIO() argv = [ "DBUtil.py", "-c", "select TestTypes_id,MyText,MyInteger,MyReal,MyDateTime,MyYesNo from TestTypes where MyInteger > 200 and MyYesNo = True", "-" ] DBUtil.main(argv) sampleLines = sys.stdout.getvalue().split("\n") expected = [ "TestTypes_id", "MyText", "MyInteger", "MyReal", "MyDateTime", "MyYesNo" ] sampleColumns = sampleLines[0].split() for iCol in range(len(expected)): # Case-insensitive comparison expected[iCol] = expected[iCol].lower() sampleColumns[iCol] = sampleColumns[iCol].lower() for iCol, col in enumerate(sampleColumns): self.assertEqual(expected[iCol], col) self.assertEqual( 2 + 1 + 1, len(sampleLines) ) # 2 data lines + 1 column name line + 1 newline at end of output sys.stdout = origStdout def test_insertFile(self): # Create a test data file to insert, and verify no errors DBUtil.runDBScript( self.SCRIPT_FILE, False) # Assume this works based on test_runDBScript method tableName = "TestTypes" idFile = StringIO() DBUtil.insertFile(self.MULTI_LINE_DATA_FILE, tableName, None, "\t", idFile) # Assume column names extracted from first row of data file # Verify number rows inserted self.assertEqual(len(self.MULTI_LINE_DATA_ROWS), idFile.getvalue().count("\n")) results = DBUtil.execute(self.DATA_QUERY) self.assertEqual(self.MULTI_LINE_DATA_ROWS, results) def test_insertFile_commandline(self): # Similar to test_insertFile, but from higher-level command-line interface DBUtil.runDBScript( self.SCRIPT_FILE, False) # Assume this works based on test_runDBScript method tableName = "TestTypes" columnNames = self.DATA_COLS.split() idFile = StringIO() # Slightly different test, specify tab as delimiter, not just any whitespace origStdin = sys.stdin origStdout = sys.stdout sys.stdin = self.MULTI_LINE_DATA_FILE sys.stdout = idFile argv = ["DBUtil.py", "-i-", "-d\\t", "-t" + tableName, "-o-"] DBUtil.main(argv) sys.stdout = origStdout sys.stdin = origStdin self.assertEqual(3, idFile.getvalue().count("\n")) results = DBUtil.execute(self.DATA_QUERY) self.assertEqual(self.MULTI_LINE_DATA_ROWS, results) def test_insertFile_skipErrors(self): # Similar to test_insertFile_commandline, but just test to see if skipErrors option works # Test run will show errror / warning messages from the app, but these are expected DBUtil.runDBScript( self.SCRIPT_FILE, False) # Assume this works based on test_runDBScript method tableName = "TestTypes" columnNames = ["MyReal", "MyYesNo", "MyText", "MyInteger"] idFile = StringIO() # Try with bogus data that should generate errors dataFile = StringIO() dataFile.write("ABCD\tPositive\tBadTest\t100.123\n") dataFile.write("700.7\t" + FALSE_STR + "\tXTest\t777\n") dataFile.write("1,099\tNegative\tMoBadTest\tfoo\n") dataFile = StringIO(dataFile.getvalue()) idFile = StringIO() origStdin = sys.stdin origStdout = sys.stdout sys.stdin = dataFile sys.stdout = idFile argv = ["DBUtil.py", "-i-", "-t" + tableName, "-o-"] argv.extend(columnNames) expectErr = True actualErr = False try: DBUtil.main(argv) except Exception as err: actualErr = True self.assertEqual(expectErr, actualErr) sys.stdout = origStdout sys.stdin = origStdin # Expect no rows succesffuly inserted since errors in input self.assertEqual(0, idFile.getvalue().count("\n")) results = DBUtil.execute( "select count(*) from TestTypes where MyText like %s", ("%Test", )) self.assertEqual(0, results[0][0]) # Try again, with bogus data that should generate errors dataFile = StringIO() dataFile.write("ABCD\tPositive\tBadTest\t100.123\n") dataFile.write("700.7\t" + FALSE_STR + "\tXTest\t777\n") dataFile.write("1,099\tNegative\tMoBadTest\tfoo\n") dataFile = StringIO(dataFile.getvalue()) idFile = StringIO() origStdin = sys.stdin origStdout = sys.stdout sys.stdin = dataFile sys.stdout = idFile argv = ["DBUtil.py", "-i-", "-t" + tableName, "-o-", "-e"] # -e option skipsErrors argv.extend(columnNames) DBUtil.main(argv) sys.stdout = origStdout sys.stdin = origStdin # Still expect 1 row to get through successfuly, despite other invalid input self.assertEqual(1, idFile.getvalue().count("\n")) results = DBUtil.execute( "select count(*) from TestTypes where MyText like %s", ("%Test", )) self.assertEqual(1, results[0][0]) def test_insertFile_dateParsing(self): # Create a test data file to insert, and verify no errors DBUtil.runDBScript( self.SCRIPT_FILE, False) # Assume this works based on test_runDBScript method tableName = "TestTypes" columnNames = ["MyInteger", "MyText", "MyDateTime"] dataFile = StringIO() dataFile.write('''-1\t"12/11/2010"\t"12/11/2010"\n''') dataFile.write( '''-2\t"2013-04-15 13:45:21"\t"2013-04-15 13:45:21"\n''') dataFile.write( '''-3\t"2003-04-15 10:45:21"\t"2003-04-15 10:45:21"\n''') dataFile.write('''-4\t"4/11/12 6:20"\t"4/11/12 6:20"\n''') dataFile = StringIO(dataFile.getvalue()) dateColFormats = { "myDateTime": None } # Deliberately change capitalization to ensure robustness DBUtil.insertFile(dataFile, tableName, columnNames, dateColFormats=dateColFormats) verifyQuery = \ """select MyInteger, MyText, MyDateTime from TestTypes where MyInteger < 0 order by MyInteger desc """ expectedData = \ [ [ -1, "12/11/2010", datetime(2010,12,11) ], [ -2, "2013-04-15 13:45:21", datetime(2013,4,15,13,45,21) ], [ -3, "2003-04-15 10:45:21", datetime(2003,4,15,10,45,21) ], [ -4, "4/11/12 6:20", datetime(2012,4,11,6,20) ], ] # Verify rows inserted with properly parsed dates results = DBUtil.execute(verifyQuery) self.assertEqual(expectedData, results) def test_insertFile_escapeStrings(self): # Create a test data file to insert, and verify no errors DBUtil.runDBScript( self.SCRIPT_FILE, False) # Assume this works based on test_runDBScript method tableName = "TestTypes" columnNames = ["MyInteger", "MyText"] dataFile = StringIO() dataFile.write('''-1\t"A"\n''') dataFile.write('''-2\t"B\xaeb"\n''') dataFile.write('''-3\t"C"\n''') dataFile.write('''-4\tD\n''') dataFile = StringIO(dataFile.getvalue()) DBUtil.insertFile(dataFile, tableName, columnNames, escapeStrings=True) verifyQuery = \ """select MyInteger, MyText from TestTypes where MyInteger < 0 order by MyInteger desc """ expectedData = \ [ [ -1, "A"], [ -2, "B\\xaeb"], [ -3, "C"], [ -4, "D"], ] # Verify rows inserted with properly parsed dates results = DBUtil.execute(verifyQuery) self.assertEqual(expectedData, results) def test_identityQuery(self): DBUtil.runDBScript(self.SCRIPT_FILE, False) # Run some other commands to see if scripts produced expected results results = DBUtil.execute("select max(TestTypes_id) from TestTypes") lastSeq = results[0][0] conn = DBUtil.connection() try: cur = conn.cursor() cur.execute( "insert into TestTypes (MyText,MyInteger,MyYesNo) values ('Another',255,True)" ) cur.execute(DBUtil.identityQuery("TestTypes")) self.assertEqual(lastSeq + 1, cur.fetchone()[0]) cur.execute( "select TestTypes_id from TestTypes where MyText = 'Another' and MyInteger = 255" ) self.assertEqual(lastSeq + 1, cur.fetchone()[0]) finally: conn.close() def test_nullCheck(self): DBUtil.runDBScript(self.SCRIPT_FILE, False) conn = DBUtil.connection() try: DBUtil.execute( "insert into TestTypes (MyText,MyInteger) values ('Test With Null', 255)", conn=conn) DBUtil.execute( "insert into TestTypes (MyText,MyInteger,MyReal,MyDateTime) values ('Test With Not Null', 255, 1.23, '2005-03-06')", conn=conn) result = DBUtil.execute( "select MyText from TestTypes where MyInteger = 255 and MyReal is null", conn=conn) self.assertEqual('Test With Null', result[0][0]) result = DBUtil.execute( "select MyText from TestTypes where MyInteger = 255 and MyReal is not null", conn=conn) self.assertEqual('Test With Not Null', result[0][0]) # Would not work with MySQL if used TIMESTAMP data type. Should be DATETIME. (TIMESTAMP tries to auto-fill values, so no nulls allowed?) result = DBUtil.execute( "select MyText from TestTypes where MyInteger = 255 and MyDateTime is null", conn=conn) self.assertEqual('Test With Null', result[0][0]) result = DBUtil.execute( "select MyText from TestTypes where MyInteger = 255 and MyDateTime is not null", conn=conn) self.assertEqual('Test With Not Null', result[0][0]) finally: conn.close() def test_findOrInsertItem(self): DBUtil.runDBScript(self.SCRIPT_FILE, False) searchDict = {} insertDict = {} searchDict["TestTypes_id"] = +123 log.debug("Insert a new item using default params") (data, isNew) = DBUtil.findOrInsertItem("TestTypes", searchDict) self.assertEqual(+123, data) self.assertEqual(True, isNew) log.debug("Find the existing item") (data, isNew) = DBUtil.findOrInsertItem("TestTypes", searchDict) self.assertEqual(+123, data) self.assertEqual(False, isNew) insertDict["TestTypes_id"] = +456 log.debug("Find existing item, with optional insert data") (data, isNew) = DBUtil.findOrInsertItem("TestTypes", searchDict, insertDict) self.assertEqual(+123, data) self.assertEqual(False, isNew) searchDict["TestTypes_id"] = +789 insertDict["TestTypes_id"] = +789 insertDict["MyInteger"] = 123 log.debug("Insert a new item with actual data") (data, isNew) = DBUtil.findOrInsertItem("TestTypes", searchDict, insertDict) self.assertEqual(+789, data) self.assertEqual(True, isNew) searchDict["TestTypes_id"] = +234 insertDict["TestTypes_id"] = +234 log.debug("Retrieve a different column") (data, isNew) = DBUtil.findOrInsertItem("TestTypes", searchDict, insertDict, retrieveCol="MyText") self.assertEqual(None, data) self.assertEqual(True, isNew) searchDict["TestTypes_id"] = +345 insertDict["TestTypes_id"] = +345 insertDict["MyText"] = "testText" log.debug("Insert and retrieve a different column") (data, isNew) = DBUtil.findOrInsertItem("TestTypes", searchDict, insertDict, retrieveCol="MyText") self.assertEqual("testText", data) self.assertEqual(True, isNew) insertDict["MyText"] = "newText" log.debug( "Try inserting a different value under an existing row. Should NOT work" ) (data, isNew) = DBUtil.findOrInsertItem("TestTypes", searchDict, insertDict, retrieveCol="MyText") self.assertEqual("testText", data) self.assertEqual(False, isNew) log.debug( "Try inserting a different value under an existing row, but force the update" ) insertDict["MyText"] = "newText" (data, isNew) = DBUtil.findOrInsertItem("TestTypes", searchDict, insertDict, retrieveCol="MyText", forceUpdate=True) self.assertEqual("newText", data) self.assertEqual(False, isNew) def test_updateFromFile(self): # Create a test data file to insert, and verify no errors DBUtil.runDBScript( self.SCRIPT_FILE, False) # Assume this works based on test_runDBScript method # Insert some blank data first to update for idValue in self.ID_DATA: DBUtil.execute( "insert into TestTypes (" + self.ID_COL + ") values (%s)", (idValue, )) # Negative test case results = DBUtil.execute(self.DATA_QUERY) self.assertNotEqual(self.DATA_ROWS, results) # Now do the actual update from the file DBUtil.updateFromFile(self.DATA_FILE, self.DATA_TABLE, self.COL_NAMES, delim="\t") results = DBUtil.execute(self.DATA_QUERY) self.assertEqual(self.DATA_ROWS, results) def test_updateFromFile_commandline(self): # Similar to test_updateFromFile, but from higher-level command-line interface DBUtil.runDBScript( self.SCRIPT_FILE, False) # Assume this works based on test_runDBScript method # Insert some blank data first to update for idValue in self.ID_DATA: DBUtil.execute( "insert into TestTypes (" + self.ID_COL + ") values (%s)", (idValue, )) # Negative test case results = DBUtil.execute(self.DATA_QUERY) self.assertNotEqual(self.DATA_ROWS, results) # Now do the actual update from the file, but build in column names to data file dataFileWithCols = StringIO() dataFileWithCols.write(self.DATA_COLS) dataFileWithCols.write(self.DATA_FILE.getvalue()) dataFileWithCols = StringIO(dataFileWithCols.getvalue()) sys.stdin = dataFileWithCols argv = ["DBUtil.py", "-u-", "-t" + self.DATA_TABLE, "-d\\t"] DBUtil.main(argv) # Verify positive results results = DBUtil.execute(self.DATA_QUERY) self.assertEqual(self.DATA_ROWS, results) ######################################################## # Repeat test but data file will use more than one key column (adding MyText) # Further note that MyText is used as both a key column to look up the row to update # and as a value column to modify dataFileWithCols = StringIO() dataFileWithCols.write("MyInteger\tMyText\tMyText\tMyReal\tMyYesNo\n") dataFileWithCols.write("100\tATest\tAAA\tNone\t" + TRUE_STR + "\t\n") dataFileWithCols.write("200\tNone\tBBB\t222.2\tNone\t\n") dataFileWithCols.write("300\tCTest\tNone\t333.3\t" + TRUE_STR + "\t\n") dataFileWithCols = StringIO(dataFileWithCols.getvalue()) # Expected results after this update self.DATA_ROWS = [] self.DATA_ROWS.append([ 100, None, True, "AAA", ]) self.DATA_ROWS.append([ 200, 200.2, False, None, ]) # This row is unchanged, because one of the key values cannot be found as null self.DATA_ROWS.append([ 300, 333.3, True, None, ]) # Negative test case results = DBUtil.execute(self.DATA_QUERY) self.assertNotEqual(self.DATA_ROWS, results) # Now do the actual update from the file, but with an extra parameter specifying 2 key columns sys.stdin = dataFileWithCols argv = ["DBUtil.py", "-u-", "-t" + self.DATA_TABLE, "-n2"] DBUtil.main(argv) # Verify positive results results = DBUtil.execute(self.DATA_QUERY) self.assertEqual(self.DATA_ROWS, results) def test_deleteRows(self): DBUtil.runDBScript(self.SCRIPT_FILE, False) query = "select count(*) from TestTypes;" # Insert some test data to delete tableName = "TestTypes" columnNames = self.DATA_COLS.split() idFile = StringIO() DBUtil.insertFile(self.DATA_FILE, tableName, columnNames, None, idFile) idValues = idFile.getvalue().split() # Count up rows before and after delete initialCount = DBUtil.execute(query)[0][0] DBUtil.deleteRows("TestTypes", idValues) afterCount = DBUtil.execute(query)[0][0] self.assertEqual(initialCount - len(idValues), afterCount) # Reinsert the test data to try deleting them by a non-default Id column idFile = StringIO() DBUtil.insertFile(self.DATA_FILE, tableName, columnNames, None, idFile) nonDefaultIds = [100, 200] initialCount = DBUtil.execute(query)[0][0] DBUtil.deleteRows("TestTypes", nonDefaultIds, "MyInteger") afterCount = DBUtil.execute(query)[0][0]
def receive(self): resp = '' if self.PIPELINE: res, data = self._respobjs.pop(0) else: res, data = self._respobjs.pop(sock) try: res.begin() except BadStatusLine: resp = sock.recv(1024) if len(resp) == 0: # The TCP Connection has been dropped, Resend the # request. self.refreshpipeline() node = Node(node=data) self.Rid = node.getAttr('rid') self.send(data) return resp else: # The server sent some data but it was a legit bad # status line. raise if res.status == OK: # Response to valid client request. headers = dict(res.getheaders()) if headers.get('content-encoding', None) == 'gzip': a = StringIO() a.write(res.read()) a.seek(0) gz = gzip.GzipFile(fileobj=a) data = gz.read() else: data = res.read() self.DEBUG(data, 'got') elif res.status == BAD_REQUEST: # Inform client that the format of an HTTP header or binding # element is unacceptable. self.DEBUG("The server did not undertand the request") raise Exception("Disconnected from server", 'error') elif res.status == FORBIDDEN: # Inform the client that it bas borken the session rules # (polling too frequently, requesting too frequently, too # many simultanious requests. self.DEBUG("Forbidden due to policy-violation", 'error') raise Exception("Disconnected from server") elif res.status == NOTFOUND: # Inform the client that (1) 'sid' is not valide, (2) 'stream' is # not valid, (3) 'rid' is larger than the upper limit of the # expected window, (4) connection manager is unable to resend # respons (5) 'key' sequence if invalid. self.DEBUG("Invalid/Corrupt Stream", 'error') raise Exception("Disconnected from server") else: msg = "Recieved status not defined in XEP-1204: %s" % res.status self.DEBUG(msg, 'error') raise Exception("Disconnected from server") node = Node(node=data) if node.getName() != 'body': self.DEBUG("The server sent an invalid BOSH payload", 'error') raise IOError("Disconnected from server") if node.getAttr('type') == 'terminate': msg = "Connection manager terminated stream: %s" % ( node.getAttr('condition')) self.DEBUG(msg, 'info') raise IOError("Disconnected from server") resp = self.bosh_to_xmlstream(node) if resp: self._owner.Dispatcher.Event('', DATA_RECEIVED, resp) else: self.send(data) return resp
def main(argv=None, return_report=False, regroup=False): if argv is None: # if argv is empty, fetch from the commandline argv = sys.argv[1:] elif isinstance(argv, _str): # else if argv is supplied but it's a simple string, we need to parse it to a list of arguments before handing to argparse or any other argument parser argv = shlex.split(argv) # Parse string just like argv using shlex # If --gui was specified, then there's a problem if len(argv) == 0 or '--gui' in argv: # pragma: no cover raise Exception('--gui specified but an error happened with lib/gooey, cannot load the GUI (however you can still use this script in commandline). Check that lib/gooey exists and that you have wxpython installed. Here is the error: ') #==== COMMANDLINE PARSER ==== #== Commandline description desc = '''Regex Path Matcher v%s Description: Match paths using regular expression, and then generate a report. Can also substitute using regex to generate output paths. A copy mode is also provided to allow the copy of files from input to output paths. This app is essentially a path matcher using regexp, and it then rewrites the path using regexp, so that you can reuse elements from input path to build the output path. This is very useful to reorganize folders for experiments, where scripts/softwares expect a specific directories layout in order to work. Advices ------- - Filepath comparison: Paths are compared against filepaths, not just folders (but of course you can match folders with regex, but remember when designing your regexp that it will compared against files paths, not directories). - Relative filepath: Paths are relative to the rootpath (except if --show-fullpath) and that they are always unix style, even on Windows (for consistency on all platforms and to easily reuse regexp). - Partial matching: partial matching regex is accepted, so you don't need to model the full filepath, only the part you need (eg, 'myfile' will match '/myfolder/sub/myfile-034.mat'). - Unix filepaths: on all platforms, including Windows, paths will be in unix format (except if you set --show_fullpath). It makes things simpler for you to make crossplatform regex patterns. - Use [^/]+ to match any file/folder in the filepath: because paths are always unix-like, you can use [^/]+ to match any part of the filepath. Eg, "([^/]+)/([^/]+)/data/mprage/.+\.(img|hdr|txt)" will match "UWS/John_Doe/data/mprage/12345_t1_mprage_98782.hdr". - Split your big task in several smaller, simpler subtasks: instead of trying to do a regex that match T1, T2, DTI, everything at the same time, try to focus on only one modality at a time and execute them using multiple regex queries: eg, move first structural images, then functional images, then dti, etc. instead of all at once. - Python module: this library can be used as a Python module to include in your scripts (just call `main(return_report=True)`). Note: use --gui (without any other argument) to launch the experimental gui (needs Gooey library). In addition to the switches provided below, using this program as a Python module also provides 2 additional options: - return_report = True to return as a variable the files matched and the report instead of saving in a file. - regroup = True will return the matched files (if return_report=True) in a tree structure of nested list/dicts depending on if the groups are named or not. Groups can also avoid being matched by using non-matching groups in regex. ''' % __version__ ep = ''' ''' #== Commandline arguments #-- Constructing the parser # Use GooeyParser if we want the GUI because it will provide better widgets if (len(argv) == 0 or '--gui' in argv) and not '--ignore-gooey' in argv: # pragma: no cover # Initialize the Gooey parser main_parser = gooey.GooeyParser(add_help=True, description=desc, epilog=ep, formatter_class=argparse.RawTextHelpFormatter) # Define Gooey widget types explicitly (because type auto-detection doesn't work quite well) widget_dir = {"widget": "DirChooser"} widget_filesave = {"widget": "FileSaver"} widget_file = {"widget": "FileChooser"} widget_text = {"widget": "TextField"} else: # Else in command-line usage, use the standard argparse # Delete the special argument to avoid unrecognized argument error in argparse if len(argv) > 0 and '--ignore-gooey' in argv[0]: argv.remove('--ignore-gooey') # this argument is automatically fed by Gooey when the user clicks on Start # Initialize the normal argparse parser main_parser = argparse.ArgumentParser(add_help=True, description=desc, epilog=ep, formatter_class=argparse.RawTextHelpFormatter) # Define dummy dict to keep compatibile with command-line usage widget_dir = {} widget_filesave = {} widget_file = {} widget_text = {} # Required arguments main_parser.add_argument('-i', '--input', metavar='/some/path', type=str, required=True, help='Path to the input folder', **widget_dir) main_parser.add_argument('-ri', '--regex_input', metavar=r'"sub[^/]+/(\d+)"', type=str, required=True, help=r'Regex to match input paths. Must be defined relatively from --input folder. Do not forget to enclose it in double quotes (and not single)! To match any directory, use [^/\]*? or the alias \dir, or \dirnodot if you want to match folders in combination with --dir switch.', **widget_text) # Optional output/copy mode main_parser.add_argument('-o', '--output', metavar='/new/path', type=str, required=False, default=None, help='Path to the output folder (where file will get copied over if --copy)', **widget_dir) main_parser.add_argument('-ro', '--regex_output', metavar=r'"newsub/\1"', type=str, required=False, default=None, help='Regex to substitute input paths to convert to output paths. Must be defined relatively from --output folder. If not provided but --output is specified, will keep the same directory layout as input (useful to extract specific files without changing layout). Do not forget to enclose it in double quotes!', **widget_text) main_parser.add_argument('-c', '--copy', action='store_true', required=False, default=False, help='Copy the matched input paths to the regex-substituted output paths.') main_parser.add_argument('-s', '--symlink', action='store_true', required=False, default=False, help='Copy with a symbolic/soft link the matched input paths to the regex-substituted output paths (works only on Linux).') main_parser.add_argument('-m', '--move', action='store_true', required=False, default=False, help='Move the matched input paths to the regex-substituted output paths.') main_parser.add_argument('--move_fast', action='store_true', required=False, default=False, help='Move the matched input paths to the regex-substituted output paths, without checking first that the copy was done correctly.') main_parser.add_argument('-d', '--delete', action='store_true', required=False, default=False, help='Delete the matched files.') # Optional general arguments main_parser.add_argument('-t', '--test', action='store_true', required=False, default=False, help='Regex test mode: Stop after the first matched file and show the result of substitution. Useful to quickly check if the regex patterns are ok.') main_parser.add_argument('--dir', action='store_true', required=False, default=False, help='Match directories too? (else only files are matched)') main_parser.add_argument('-y', '--yes', action='store_true', required=False, default=False, help='Automatically accept the simulation and apply changes (good for batch processing and command chaining).') main_parser.add_argument('-f', '--force', action='store_true', required=False, default=False, help='Force overwriting the target path already exists. Note that by default, if a file already exist, without this option, it won\'t get overwritten and no message will be displayed.') main_parser.add_argument('--show_fullpath', action='store_true', required=False, default=False, help='Show full paths instead of relative paths in the simulation.') main_parser.add_argument('-ra', '--range', type=str, metavar='1:10-255', required=False, default=False, help='Range mode: match only the files with filenames containing numbers in the specified range. The format is: (regex-match-group-id):(range-start)-(range-end). regex-match-group-id is the id of the regular expression that will contain the numbers that must be compared to the range. range-end is inclusive.') main_parser.add_argument('-re', '--regex_exists', metavar=r'"newsub/\1"', type=str, required=False, default=None, help='Regex of output path to check if the matched regex here is matched prior writing output files.', **widget_text) main_parser.add_argument('--report', type=str, required=False, default='pathmatcher_report.txt', metavar='pathmatcher_report.txt', help='Where to store the simulation report (default: pwd = current working dir).', **widget_filesave) main_parser.add_argument('--noreport', action='store_true', required=False, default=False, help='Do not create a report file, print the report in the console.') main_parser.add_argument('--tree', action='store_true', required=False, default=False, help='Regroup in a tree structure the matched files according to named and unnamed regex groups, and save the result as a json file (pathmatcher_tree.json).') main_parser.add_argument('-l', '--log', metavar='/some/folder/filename.log', type=str, required=False, help='Path to the log file. (Output will be piped to both the stdout and the log file)', **widget_filesave) main_parser.add_argument('-v', '--verbose', action='store_true', required=False, default=False, help='Verbose mode (show more output).') main_parser.add_argument('--silent', action='store_true', required=False, default=False, help='No console output (but if --log specified, the log will still be saved in the specified file).') #== Parsing the arguments args = main_parser.parse_args(argv) # Storing all arguments to args #-- Set variables from arguments inputpath = args.input outputpath = args.output if args.output else None regex_input = args.regex_input regex_output = args.regex_output regex_exists = args.regex_exists copy_mode = args.copy symlink_mode = args.symlink move_mode = args.move movefast_mode = args.move_fast delete_mode = args.delete test_flag = args.test dir_flag = args.dir yes_flag = args.yes force = args.force only_missing = not force show_fullpath = args.show_fullpath path_range = args.range reportpath = args.report noreport = args.noreport tree_flag = args.tree verbose = args.verbose silent = args.silent # -- Sanity checks # First check if there is any input path, it's always needed if inputpath is None: raise NameError('No input path specified! Please specify one!') # Try to decode in unicode, else we will get issues down the way when outputting files try: inputpath = str(inputpath) except UnicodeDecodeError as exc: inputpath = str(inputpath, encoding=chardet.detect(inputpath)['encoding']) if outputpath: try: outputpath = str(outputpath) except UnicodeDecodeError as exc: outputpath = str(outputpath, encoding=chardet.detect(outputpath)['encoding']) # Remove trailing spaces inputpath = inputpath.strip() if outputpath: outputpath = outputpath.strip() # Input or output path is a URL (eg: file:///media/... on Ubuntu/Debian), then strip that out RE_urlprotocol = re.compile(r'^\w{2,}:[/\\]{2,}', re.I) if RE_urlprotocol.match(inputpath): inputpath = urllib.parse.unquote(inputpath).decode("utf8") # first decode url encoded characters such as spaces %20 inputpath = r'/' + RE_urlprotocol.sub(r'', inputpath) # need to prepend the first '/' since it is probably an absolute path and here we will strip the whole protocol if outputpath and RE_urlprotocol.match(outputpath): outputpath = urllib.parse.unquote(outputpath).decode("utf8") outputpath = r'/' + RE_urlprotocol.sub(r'', outputpath) # Check if input/output paths exist, else might be a relative path, then convert to an absolute path rootfolderpath = inputpath if os.path.exists(inputpath) else fullpath(inputpath) rootoutpath = outputpath if outputpath is None or os.path.exists(outputpath) else fullpath(outputpath) # Single file specified instead of a folder: we define the input folder as the top parent of this file if os.path.isfile(inputpath): # if inputpath is a single file (instead of a folder), then define the rootfolderpath as the parent directory (for correct relative path generation, else it will also truncate the filename!) rootfolderpath = os.path.dirname(inputpath) if outputpath and os.path.isfile(outputpath): # if inputpath is a single file (instead of a folder), then define the rootfolderpath as the parent directory (for correct relative path generation, else it will also truncate the filename!) rootoutpath = os.path.dirname(outputpath) # Strip trailing slashes to ensure we correctly format paths afterward if rootfolderpath: rootfolderpath = rootfolderpath.rstrip('/\\') if rootoutpath: rootoutpath = rootoutpath.rstrip('/\\') # Final check of whether thepath exist if not os.path.isdir(rootfolderpath): raise NameError('Specified input path: %s (detected as %s) does not exist. Please check the specified path.' % (inputpath, rootfolderpath)) # Check the modes are not conflicting if sum([1 if elt == True else 0 for elt in [copy_mode, symlink_mode, move_mode, movefast_mode, delete_mode]]) > 1: raise ValueError('Cannot set multiple modes simultaneously, please choose only one!') # Check if an output is needed and is not set if (copy_mode or symlink_mode or move_mode or movefast_mode) and not outputpath: raise ValueError('--copy or --symlink or --move or --move_fast specified but no --output !') # If tree mode enabled, enable also the regroup option if tree_flag: regroup = True # -- Configure the log file if enabled (ptee.write() will write to both stdout/console and to the log file) if args.log: ptee = Tee(args.log, 'a', nostdout=silent) #sys.stdout = Tee(args.log, 'a') sys.stderr = Tee(args.log, 'a', nostdout=silent) else: ptee = Tee(nostdout=silent) # -- Preprocess regular expression to add aliases # Directory alias regex_input = regex_input.replace('\dirnodot', r'[^\\/.]*?').replace('\dir', r'[^\\/]*?') regex_output = regex_output.replace('\dirnodot', r'[^\\/.]*?').replace('\dir', r'[^\\/]*?') if regex_output else regex_output regex_exists = regex_exists.replace('\dirnodot', r'[^\\/.]*?').replace('\dir', r'[^\\/]*?') if regex_exists else regex_exists #### Main program # Test if regular expressions are correct syntactically try: regin = re.compile(str_to_raw(regex_input)) regout = re.compile(str_to_raw(regex_output)) if regex_output else None regexist = re.compile(str_to_raw(regex_exists)) if regex_exists else None if path_range: # parse the range format temp = re.search(r'(\d+):(\d+)-(\d+)', path_range) prange = {"group": int(temp.group(1)), "start": int(temp.group(2)), "end": int(temp.group(3))} del temp except re.error as exc: ptee.write("Regular expression is not correct, please fix it! Here is the error stack:\n") ptee.write(traceback.format_exc()) return 1 ptee.write("== Regex Path Matcher started ==\n") ptee.write("Parameters:") ptee.write("- Input root: %s" % inputpath) ptee.write("- Input regex: %s" % regex_input) ptee.write("- Output root: %s" % outputpath) ptee.write("- Output regex: %s" % regex_output) ptee.write("- Full arguments: %s" % ' '.join(sys.argv)) ptee.write("\n") # == FILES WALKING AND MATCHING/SUBSTITUTION STEP files_list = [] # "to copy" files list, stores the list of input files and their corresponding output path (computed using regex) files_list_regroup = {} # files list regrouped, if regroup = True ptee.write("Computing paths matching and simulation report, please wait (total time depends on files count - filesize has no influence). Press CTRL+C to abort\n") for dirpath, filename in tqdm(recwalk(inputpath, topdown=False, folders=dir_flag), unit='files', leave=True, smoothing=0): # Get full absolute filepath and relative filepath from base dir filepath = os.path.join(dirpath, filename) relfilepath = path2unix(os.path.relpath(filepath, rootfolderpath)) # File relative path from the root (we truncate the rootfolderpath so that we can easily check the files later even if the absolute path is different) regin_match = regin.search(relfilepath) # Check if relative filepath matches the input regex if regin_match: # Matched! We store it in the "to copy" files list # If range mode enabled, check if the numbers in the filepath are in the specified range, else we skip this file if path_range: curval = int(regin_match.group(prange['group'])) if not (prange['start'] <= curval <= prange['end']): continue # Compute the output filepath using output regex if outputpath: newfilepath = regin.sub(regex_output, relfilepath) if regex_output else relfilepath #fulloutpath = os.path.join(rootoutpath, newfilepath) else: newfilepath = None #fulloutpath = None # Check if output path exists (if argument is enabled) if regex_exists and newfilepath: if not os.path.exists(os.path.join(rootoutpath, regin.sub(regex_exists, relfilepath))): # If not found, skip to the next file if verbose or test_flag: ptee.write("\rFile skipped because output does not exist: %s" % newfilepath) continue # Store both paths into the "to copy" list files_list.append([relfilepath, newfilepath]) if verbose or test_flag: # Regex test mode or verbose: print the match ptee.write("\rMatch: %s %s %s\n" % (relfilepath, "-->" if newfilepath else "", newfilepath if newfilepath else "")) if test_flag: # Regex test mode: break file walking after the first match break # Store paths in a tree structure based on groups if regroup is enabled if regroup and regin_match.groups(): curlevel = files_list_regroup # current level in the tree parentlevel = curlevel # parent level in the tree (necessary to modify the leaf, else there is no way to reference by pointer) lastg = 0 # last group key (to access the leaf) gdict = regin_match.groupdict() # store the named groups, so we can pop as we consume it for g in regin_match.groups(): # For each group if g is None: # If group value is empty, just skip (ie, this is an optional group, this allow to specify multiple optional groups and build the tree accordingly) continue # Find if the current group value is in a named group, in this case we will also use the key name of the group followed by the value, and remove from dict (so that if there are multiple matching named groups with same value we don't lose them) k, v, gdict = pop_first_namedgroup(gdict, g) # If a named group is found, use the key followed by value as nodes if k: if not k in curlevel: # Create node for group key/name curlevel[k] = {} if not g in curlevel[k]: # Create subnode for group value curlevel[k][g] = {} # Memorize the parent level parentlevel = curlevel[k] lastg = g # Memorize current level (step down one level for next iteration) curlevel = curlevel[k][g] # Else it is an unnamed group, use the value as the node name else: if not g in curlevel: # Create node for group value curlevel[g] = {} # Memorize the parent level parentlevel = curlevel lastg = g # Memorize current level (step down one level for next iteration) curlevel = curlevel[g] # End of tree structure construction # Create the leaf if not done already, as a list if not parentlevel[lastg]: parentlevel[lastg] = [] # Append the value (so if there are multiple files matching the same structure, they will be appended in this list) parentlevel[lastg].append([relfilepath, newfilepath]) ptee.write("End of simulation. %i files matched." % len(files_list)) # Regex test mode: just quit after the first match if test_flag: if return_report: return files_list, None else: return 0 # == SIMULATION REPORT STEP ptee.write("Preparing simulation report, please wait a few seconds...") # Initialize conflicts global flags conflict1_flag = False conflict2_flag = False # Show result in console using a Python implementation of MORE (because file list can be quite long) #more_display=More(num_lines=30) #"\n".join(map(str,files_list)) | more_display # Precompute conflict type 2 lookup table (= dict where each key is a output filepath, and the value the number of occurrences) outdict = {} for file_op in files_list: outdict[file_op[1]] = outdict.get(file_op[1], 0) + 1 # Build and show simulation report in user's default text editor if noreport: reportfile = StringIO() else: reportfile = open(reportpath, 'w') try: reportfile.write("== REGEX PATH MATCHER SIMULATION REPORT ==\n") reportfile.write("Total number of files matched: %i\n" % len(files_list)) reportfile.write("Parameters:\n") reportfile.write("- Input root: %s\n" % inputpath.encode('utf-8')) reportfile.write("- Input regex: %s\n" % regex_input) reportfile.write("- Output root: %s\n" % (outputpath.encode('utf-8') if outputpath else '')) reportfile.write("- Output regex: %s\n" % regex_output) reportfile.write("- Full arguments: %s" % ' '.join(sys.argv)) reportfile.write("\r\n") reportfile.write("List of matched files:\n") for file_op in files_list: conflict1 = False conflict2 = False if outputpath: # Check if there was a conflict: # Type 1 - already existing output file (force overwrite?) fulloutpath = os.path.join(rootoutpath, file_op[1]) if os.path.exists(fulloutpath): conflict1 = True conflict1_flag = True # Type 2 - two files will output with same name (bad regex) if outdict[file_op[1]] > 1: conflict2 = True conflict2_flag = True # Show relative or absolute paths? if show_fullpath: showinpath = os.path.join(rootfolderpath, file_op[0]) showoutpath = os.path.join(rootoutpath, file_op[1]) if outputpath else None else: showinpath = file_op[0] showoutpath = file_op[1] if outputpath else None # Write into report file reportfile.write("* %s %s %s %s %s" % (showinpath, "-->" if (outputpath or delete_mode) else "", showoutpath if outputpath else "", "[ALREADY_EXIST]" if conflict1 else '', "[CONFLICT]" if conflict2 else '')) reportfile.write("\n") if noreport: reportfile.seek(0) print(reportfile.read()) finally: try: reportfile.close() except ValueError as exc: pass # Open the simulation report with the system's default text editor if not (yes_flag or return_report or noreport): # if --yes is supplied, just skip question and apply! ptee.write("Opening simulation report with your default editor, a new window should open.") open_with_default_app(reportpath) # == COPY/MOVE STEP if files_list and ( delete_mode or ((copy_mode or symlink_mode or move_mode or movefast_mode) and outputpath) ): # -- USER NOTIFICATION AND VALIDATION # Notify user of conflicts ptee.write("\n") if conflict1_flag: ptee.write("Warning: conflict type 1 (files already exist) has been detected. Please use --force if you want to overwrite them, else they will be skipped.\n") if conflict2_flag: ptee.write("Warning: conflict type 2 (collision) has been detected. If you continue, several files will have the same name due to the specified output regex (thus, some will be lost). You should cancel and check your regular expression for output.\n") if not conflict1_flag and not conflict2_flag: ptee.write("No conflict detected. You are good to go!") # Ask user if we should apply if not (yes_flag or return_report): # if --yes is supplied, just skip question and apply! applycopy = input("Do you want to apply the result of the path reorganization simulation on %i files? [Y/N]: " % len(files_list)) if applycopy.lower() != 'y': return 0 # -- APPLY STEP ptee.write("Applying new path structure, please wait (total time depends on file sizes and matches count). Press CTRL+C to abort") for infilepath, outfilepath in tqdm(files_list, total=len(files_list), unit='files', leave=True): if verbose: ptee.write("%s --> %s" % (infilepath, outfilepath)) # Copy the file! (User previously accepted to apply the simulation) fullinpath = os.path.join(rootfolderpath, infilepath) if outputpath: fulloutpath = os.path.join(rootoutpath, outfilepath) if movefast_mode: # movefast: just move the file/directory tree move_any(fullinpath, fulloutpath) else: # else we first copy in any case, then delete old file if move_mode copy_any(fullinpath, fulloutpath, only_missing=only_missing, symlink=True if symlink_mode else False) # copy file if move_mode: # if move mode, then delete the old file. Copy/delete is safer than move because we can ensure that the file is fully copied (metadata/stats included) before deleting the old remove_if_exist(fullinpath) if delete_mode: # if delete mode, ensure that the original file is deleted! remove_if_exist(fullinpath) # == RETURN AND END OF MAIN ptee.write("Task done, quitting.") # Save the tree structure in a json file if --tree is enabled if tree_flag: with open('pathmatcher_tree.json', 'wb') as jsonout: jsonout.write(json.dumps(files_list_regroup, sort_keys=True, indent=4, separators=(',', ': '))) print('Tree structure saved in file pathmatcher_tree.json') # Script mode: return the matched files and their substitutions if available if return_report: if regroup: return files_list_regroup, [conflict1_flag, conflict2_flag] else: return files_list, [conflict1_flag, conflict2_flag] # Standalone mode: just return non error code else: return 0
def readByteIO(): f = BytesIO(b'\xe4\xb8\xad\xe6\x96\x87') print(f.read()) StringIO.seek(0, 2)
def save_load_deck(model, xref='standard', punch=True, run_remove_unused=True, run_convert=True, run_renumber=True, run_mirror=True, run_save_load=True, run_quality=True, write_saves=True, run_save_load_hdf5=True, run_mass_properties=True, run_loads=True): """writes, re-reads, saves an obj, loads an obj, and returns the deck""" model.validate() model.pop_parse_errors() model.pop_xref_errors() bdf_file = StringIO() model.write_bdf(bdf_file, size=8, close=False) bdf_file.seek(0) model.write_bdf(bdf_file, size=16, close=False) bdf_file.seek(0) model.write_bdf(bdf_file, size=16, is_double=True, close=False) bdf_file.seek(0) if write_saves and model.save_file_structure: bdf_filenames = { 0: 'junk.bdf', } model.write_bdfs(bdf_filenames) os.remove('junk.bdf') if run_remove_unused: remove_unused(model) if run_convert: units_to = ['m', 'kg', 's'] units = ['ft', 'lbm', 's'] convert(model, units_to, units) model2 = BDF(log=model.log) #print(bdf_file.getvalue()) model2.read_bdf(bdf_file, punch=punch, xref=False) _cross_reference(model2, xref) model2.pop_parse_errors() model2.get_bdf_stats() model2.write_bdf('model2.bdf') nelements = len(model2.elements) + len(model2.masses) nnodes = len(model2.nodes) + len(model2.spoints) + len(model2.epoints) _run_mass_properties(model2, nnodes, nelements, run_mass_properties=run_mass_properties) _run_loads(model2, nelements, run_loads=run_loads) if run_save_load: model2.save(obj_filename='model.obj', unxref=True) model3 = BDF(debug=False, log=model.log, mode='msc') model3.load(obj_filename='model.obj') os.remove('model.obj') else: model2.uncross_reference() model3 = model2 if run_save_load_hdf5 and IS_H5PY: model2.export_hdf5_filename('test.h5') model4 = BDF(log=model2.log) model4.load_hdf5_filename('test.h5') model4.validate() bdf_stream = StringIO() model4.write_bdf(bdf_stream, encoding=None, size=8, is_double=False, interspersed=False, enddata=None, write_header=True, close=True) for key, unused_value in model2.card_count.items(): if key == 'ENDDATA': continue if key not in model4.card_count: msg = 'key=%r was not loaded to hdf5\nexpected=%s\nactual=%s' % ( key, model2.card_count, model4.card_count) #raise RuntimeError(msg) model.log.error(msg) cross_reference(model3, xref) if run_renumber: renumber('model2.bdf', model.log) if run_mirror: # we put embed this under renumber to prevent modifying an # existing model to prevent breaking tests # # shouldn't have any effect model2.bdf bdf_mirror('model2.bdf', plane='xz', log=model.log) os.remove('model2.bdf') if model.elements and run_quality: element_quality(model) return model3
class Stdout: """Omega framework's dedicated standard output wrapper, supplying some enhancements, such as pattern coloration and back logging. The 'backlog' argument is defaultly set to False, it can be enabled at initialization if set to True, or enabled later setting the instance's backlog attribute to an empty string. NOTE: See module's help for more informations. """ def __init__(self, outfile=sys.__stdout__, backlog=False): # get original stdout self._orig_outfile = outfile # just in case we wrap at runtime o the future, # as we did with `colorama_wrapper` in the past self.outfile = self._orig_outfile # handle back logging self._backlog = StringIO() if backlog: self._has_backlog = True else: self._has_backlog = False # are colors supported ? self._has_colors = ui.output.colors() self._write_lock = False def __del__(self): """Restore the original sys.stdout on Wrapper deletion""" self._backlog.close() # dirty hack when used before argparse on main file... sys.stdout = self._orig_outfile # try: # sys.stdout = self._orig_outfile # except: # pass def __getattr__(self, obj): """Fallback to original stdout objects for undefined methods""" return getattr(self._orig_outfile, obj) def _write_line(self, line): """Process individual line morphing, and write it""" # Process per platform newline transformation if line.endswith('\r\n'): line = line[:-2] + os.linesep elif line.endswith('\n'): line = line[:-1] + os.linesep # special case: debug tag is only printed if VERBOSITY is True # NOTE: considering that the python print() function does another # write() to add line separator, we need a self._write_lock # canary to block it if the previous message display aborted. from core import session if line.startswith("[#] ") and not session.Conf.VERBOSITY(): self._write_lock = True return if self._write_lock: self._write_lock = False if line == os.linesep: return line = self.process_tags(line) # handle tagged lines coloration # Write line to stdout, and it's decolorized version on backlog # if standard output is not a tty, decolorize anything. if self._has_backlog: self._backlog.write(decolorize(line)) if not self._has_colors: line = decolorize(line) try: self.outfile.write(line) except UnicodeEncodeError: buf = encoding.encode(line) self.outfile.buffer.write(buf) def write(self, string): """Write the given string to stdout""" for line in string.splitlines(1): self._write_line(line) @property def backlog(self): """A dedicated stdout back logging buffer""" if self._has_backlog: self._backlog.seek(0) return self._backlog.read() raise AttributeError() @backlog.setter def backlog(self, value): """Setting backlog's value to None or False disables it, While giving any other value resets the backlog buffer. If a non empty string is given, backlog takes it as new value """ del self.backlog if value is not False and value is not None: self._has_backlog = True if value.__class__ == str: self._backlog.write(decolorize(value)) @backlog.deleter def backlog(self): """Flush backlog buffer and mark it as disabled on deletion""" self._backlog.truncate(0) self._backlog.seek(0) self._has_backlog = False @staticmethod def process_tags(line): """Process tagged line transformations, such as auto colorization and pattern rules. >>> process_tags("[*] FOO: «bar»\\n") '\\x1b[1m\\x1b[34m[*]\\x1b[0m FOO: \\x1b[37m«bar»\\x1b[0m\\n' """ tag_list = [ ('%BoldBlue', '[*] '), # INFO ('%BoldRed', '[-] '), # ERROR ('%BoldGreen', '[+] '), # SUCCESS ('%BoldPink', '[?] '), # QUESTION ('%BoldYellow', '[!] '), # WARNING ('%BoldBlack', '[#] ') ] # DEBUG # if not tagged, return the line as it is tag = None # make pylint happy for index, tag in enumerate(tag_list): if line.startswith(tag[1]): break if index == len(tag_list) - 1: return line # remove dulpicate tags >>> "[-] [-] Foo" -> "[-] Foo" while line[len(tag[1]):][0:len(tag[1])] == tag[1]: line = line[len(tag[1]):] # format line's tag with requested color style line = colorize(*tag) + line[len(tag[1]):] # colorize «*» patterns from tagged line: dye = lambda obj: colorize('%White', repr(obj.group(1))) line = re.sub('«(.+?)»', dye, line) dye = lambda obj: '`' + colorize('%DimWhiteBold', obj.group(1)) + '`' line = re.sub('`(.+?)`', dye, line) return line
class seek_wrapper: """Adds a seek method to a file object. This is only designed for seeking on readonly file-like objects. Wrapped file-like object must have a read method. The readline method is only supported if that method is present on the wrapped object. The readlines method is always supported. xreadlines and iteration are supported only for Python 2.2 and above. Public attributes: wrapped: the wrapped file object is_closed: true iff .close() has been called WARNING: All other attributes of the wrapped object (ie. those that are not one of wrapped, read, readline, readlines, xreadlines, __iter__ and next) are passed through unaltered, which may or may not make sense for your particular file object. """ # General strategy is to check that cache is full enough, then delegate to # the cache (self.__cache, which is a cStringIO.StringIO instance). A seek # position (self.__pos) is maintained independently of the cache, in order # that a single cache may be shared between multiple seek_wrapper objects. # Copying using module copy shares the cache in this way. def __init__(self, wrapped): self.wrapped = wrapped self.__read_complete_state = [False] self.__is_closed_state = [False] self.__have_readline = hasattr(self.wrapped, "readline") self.__cache = StringIO() self.__pos = 0 # seek position def invariant(self): # The end of the cache is always at the same place as the end of the # wrapped file (though the .tell() method is not required to be present # on wrapped file). return self.wrapped.tell() == len(self.__cache.getvalue()) def close(self): self.wrapped.close() self.is_closed = True def __getattr__(self, name): if name == "is_closed": return self.__is_closed_state[0] elif name == "read_complete": return self.__read_complete_state[0] wrapped = self.__dict__.get("wrapped") if wrapped: return getattr(wrapped, name) return getattr(self.__class__, name) def __setattr__(self, name, value): if name == "is_closed": self.__is_closed_state[0] = bool(value) elif name == "read_complete": if not self.is_closed: self.__read_complete_state[0] = bool(value) else: self.__dict__[name] = value def seek(self, offset, whence=0): assert whence in [0, 1, 2] # how much data, if any, do we need to read? if whence == 2: # 2: relative to end of *wrapped* file if offset < 0: raise ValueError("negative seek offset") # since we don't know yet where the end of that file is, we must # read everything to_read = None else: if whence == 0: # 0: absolute if offset < 0: raise ValueError("negative seek offset") dest = offset else: # 1: relative to current position pos = self.__pos if pos < offset: raise ValueError("seek to before start of file") dest = pos + offset end = len_of_seekable(self.__cache) to_read = dest - end if to_read < 0: to_read = 0 if to_read != 0: self.__cache.seek(0, 2) if to_read is None: assert whence == 2 self.__cache.write(self.wrapped.read()) self.read_complete = True self.__pos = self.__cache.tell() - offset else: data = self.wrapped.read(to_read) if not data: self.read_complete = True else: self.__cache.write(data) # Don't raise an exception even if we've seek()ed past the end # of .wrapped, since fseek() doesn't complain in that case. # Also like fseek(), pretend we have seek()ed past the end, # i.e. not: #self.__pos = self.__cache.tell() # but rather: self.__pos = dest else: self.__pos = dest def tell(self): return self.__pos def __copy__(self): cpy = self.__class__(self.wrapped) cpy.__cache = self.__cache cpy.__read_complete_state = self.__read_complete_state cpy.__is_closed_state = self.__is_closed_state return cpy def get_data(self): pos = self.__pos try: self.seek(0) return self.read(-1) finally: self.__pos = pos def read(self, size=-1): pos = self.__pos end = len_of_seekable(self.__cache) available = end - pos # enough data already cached? if size <= available and size != -1: self.__cache.seek(pos) self.__pos = pos + size return self.__cache.read(size) # no, so read sufficient data from wrapped file and cache it self.__cache.seek(0, 2) if size == -1: self.__cache.write(self.wrapped.read()) self.read_complete = True else: to_read = size - available assert to_read > 0 data = self.wrapped.read(to_read) if not data: self.read_complete = True else: self.__cache.write(data) self.__cache.seek(pos) data = self.__cache.read(size) self.__pos = self.__cache.tell() assert self.__pos == pos + len(data) return data def readline(self, size=-1): if not self.__have_readline: raise NotImplementedError("no readline method on wrapped object") # line we're about to read might not be complete in the cache, so # read another line first pos = self.__pos self.__cache.seek(0, 2) data = self.wrapped.readline() if not data: self.read_complete = True else: self.__cache.write(data) self.__cache.seek(pos) data = self.__cache.readline() if size != -1: r = data[:size] self.__pos = pos + size else: r = data self.__pos = pos + len(data) return r def readlines(self, sizehint=-1): pos = self.__pos self.__cache.seek(0, 2) self.__cache.write(self.wrapped.read()) self.read_complete = True self.__cache.seek(pos) data = self.__cache.readlines(sizehint) self.__pos = self.__cache.tell() return data def __iter__(self): return self def __next__(self): line = self.readline() if line == "": raise StopIteration return line xreadlines = __iter__ def __repr__(self): return ("<%s at %s whose wrapped object = %r>" % (self.__class__.__name__, hex(abs(id(self))), self.wrapped))
class EmbeddedSphinxShell(object): """An embedded IPython instance to run inside Sphinx""" def __init__(self, exec_lines=None): self.cout = StringIO() if exec_lines is None: exec_lines = [] # Create config object for IPython config = Config() config.HistoryManager.hist_file = ':memory:' config.InteractiveShell.autocall = False config.InteractiveShell.autoindent = False config.InteractiveShell.colors = 'NoColor' # create a profile so instance history isn't saved tmp_profile_dir = tempfile.mkdtemp(prefix='profile_') profname = 'auto_profile_sphinx_build' pdir = os.path.join(tmp_profile_dir, profname) profile = ProfileDir.create_profile_dir(pdir) # Create and initialize global ipython, but don't start its mainloop. # This will persist across different EmbeddedSphinxShell instances. IP = InteractiveShell.instance(config=config, profile_dir=profile) atexit.register(self.cleanup) # Store a few parts of IPython we'll need. self.IP = IP self.user_ns = self.IP.user_ns self.user_global_ns = self.IP.user_global_ns self.lines_waiting = [] self.input = '' self.output = '' self.tmp_profile_dir = tmp_profile_dir self.is_verbatim = False self.is_doctest = False self.is_suppress = False # Optionally, provide more detailed information to shell. # this is assigned by the SetUp method of IPythonDirective # to point at itself. # # So, you can access handy things at self.directive.state self.directive = None # on the first call to the savefig decorator, we'll import # pyplot as plt so we can make a call to the plt.gcf().savefig self._pyplot_imported = False # Prepopulate the namespace. for line in exec_lines: self.process_input_line(line, store_history=False) def cleanup(self): shutil.rmtree(self.tmp_profile_dir, ignore_errors=True) def clear_cout(self): self.cout.seek(0) self.cout.truncate(0) def process_input_line(self, line, store_history=True): """process the input, capturing stdout""" stdout = sys.stdout try: sys.stdout = self.cout self.lines_waiting.append(line) if self.IP.check_complete()[0] != 'incomplete': source_raw = ''.join(self.lines_waiting) self.lines_waiting = [] self.IP.run_cell(source_raw, store_history=store_history) finally: sys.stdout = stdout def process_image(self, decorator): """ # build out an image directive like # .. image:: somefile.png # :width 4in # # from an input like # savefig somefile.png width=4in """ savefig_dir = self.savefig_dir source_dir = self.source_dir saveargs = decorator.split(' ') filename = saveargs[1] # insert relative path to image file in source (as absolute path for Sphinx) outfile = '/' + os.path.relpath(os.path.join(savefig_dir, filename), source_dir) imagerows = ['.. image:: %s' % outfile] for kwarg in saveargs[2:]: arg, val = kwarg.split('=') arg = arg.strip() val = val.strip() imagerows.append(' :%s: %s' % (arg, val)) image_file = os.path.basename(outfile) # only return file name image_directive = '\n'.join(imagerows) return image_file, image_directive # Callbacks for each type of token def process_input(self, data, input_prompt, lineno): """ Process data block for INPUT token. """ decorator, input, rest = data image_file = None image_directive = None is_verbatim = decorator == '@verbatim' or self.is_verbatim is_doctest = (decorator is not None and \ decorator.startswith('@doctest')) or self.is_doctest is_suppress = decorator == '@suppress' or self.is_suppress is_okexcept = decorator == '@okexcept' or self.is_okexcept is_okwarning = decorator == '@okwarning' or self.is_okwarning is_savefig = decorator is not None and \ decorator.startswith('@savefig') input_lines = input.split('\n') if len(input_lines) > 1: if input_lines[-1] != "": input_lines.append('') # make sure there's a blank line # so splitter buffer gets reset continuation = ' %s:' % ''.join(['.'] * (len(str(lineno)) + 2)) if is_savefig: image_file, image_directive = self.process_image(decorator) ret = [] is_semicolon = False # Hold the execution count, if requested to do so. if is_suppress and self.hold_count: store_history = False else: store_history = True # Note: catch_warnings is not thread safe with warnings.catch_warnings(record=True) as ws: for i, line in enumerate(input_lines): if line.endswith(';'): is_semicolon = True if i == 0: # process the first input line if is_verbatim: self.process_input_line('') self.IP.execution_count += 1 # increment it anyway else: # only submit the line in non-verbatim mode self.process_input_line(line, store_history=store_history) formatted_line = '%s %s' % (input_prompt, line) else: # process a continuation line if not is_verbatim: self.process_input_line(line, store_history=store_history) formatted_line = '%s %s' % (continuation, line) if not is_suppress: ret.append(formatted_line) if not is_suppress and len(rest.strip()) and is_verbatim: # The "rest" is the standard output of the input. This needs to be # added when in verbatim mode. If there is no "rest", then we don't # add it, as the new line will be added by the processed output. ret.append(rest) # Fetch the processed output. (This is not the submitted output.) self.cout.seek(0) processed_output = self.cout.read() if not is_suppress and not is_semicolon: # # In IPythonDirective.run, the elements of `ret` are eventually # combined such that '' entries correspond to newlines. So if # `processed_output` is equal to '', then the adding it to `ret` # ensures that there is a blank line between consecutive inputs # that have no outputs, as in: # # In [1]: x = 4 # # In [2]: x = 5 # # When there is processed output, it has a '\n' at the tail end. So # adding the output to `ret` will provide the necessary spacing # between consecutive input/output blocks, as in: # # In [1]: x # Out[1]: 5 # # In [2]: x # Out[2]: 5 # # When there is stdout from the input, it also has a '\n' at the # tail end, and so this ensures proper spacing as well. E.g.: # # In [1]: print x # 5 # # In [2]: x = 5 # # When in verbatim mode, `processed_output` is empty (because # nothing was passed to IP. Sometimes the submitted code block has # an Out[] portion and sometimes it does not. When it does not, we # need to ensure proper spacing, so we have to add '' to `ret`. # However, if there is an Out[] in the submitted code, then we do # not want to add a newline as `process_output` has stuff to add. # The difficulty is that `process_input` doesn't know if # `process_output` will be called---so it doesn't know if there is # Out[] in the code block. The requires that we include a hack in # `process_block`. See the comments there. # ret.append(processed_output) elif is_semicolon: # Make sure there is a newline after the semicolon. ret.append('') # context information filename = "Unknown" lineno = 0 if self.directive.state: filename = self.directive.state.document.current_source lineno = self.directive.state.document.current_line # output any exceptions raised during execution to stdout # unless :okexcept: has been specified. if not is_okexcept and "Traceback" in processed_output: s = "\nException in %s at block ending on line %s\n" % (filename, lineno) s += "Specify :okexcept: as an option in the ipython:: block to suppress this message\n" sys.stdout.write('\n\n>>>' + ('-' * 73)) sys.stdout.write(s) sys.stdout.write(processed_output) sys.stdout.write('<<<' + ('-' * 73) + '\n\n') # output any warning raised during execution to stdout # unless :okwarning: has been specified. if not is_okwarning: for w in ws: s = "\nWarning in %s at block ending on line %s\n" % (filename, lineno) s += "Specify :okwarning: as an option in the ipython:: block to suppress this message\n" sys.stdout.write('\n\n>>>' + ('-' * 73)) sys.stdout.write(s) sys.stdout.write(('-' * 76) + '\n') s = warnings.formatwarning(w.message, w.category, w.filename, w.lineno, w.line) sys.stdout.write(s) sys.stdout.write('<<<' + ('-' * 73) + '\n') self.cout.truncate(0) return (ret, input_lines, processed_output, is_doctest, decorator, image_file, image_directive) def process_output(self, data, output_prompt, input_lines, output, is_doctest, decorator, image_file): """ Process data block for OUTPUT token. """ # Recall: `data` is the submitted output, and `output` is the processed # output from `input_lines`. TAB = ' ' * 4 if is_doctest and output is not None: found = output # This is the processed output found = found.strip() submitted = data.strip() if self.directive is None: source = 'Unavailable' content = 'Unavailable' else: source = self.directive.state.document.current_source content = self.directive.content # Add tabs and join into a single string. content = '\n'.join([TAB + line for line in content]) # Make sure the output contains the output prompt. ind = found.find(output_prompt) if ind < 0: e = ('output does not contain output prompt\n\n' 'Document source: {0}\n\n' 'Raw content: \n{1}\n\n' 'Input line(s):\n{TAB}{2}\n\n' 'Output line(s):\n{TAB}{3}\n\n') e = e.format(source, content, '\n'.join(input_lines), repr(found), TAB=TAB) raise RuntimeError(e) found = found[len(output_prompt):].strip() # Handle the actual doctest comparison. if decorator.strip() == '@doctest': # Standard doctest if found != submitted: e = ('doctest failure\n\n' 'Document source: {0}\n\n' 'Raw content: \n{1}\n\n' 'On input line(s):\n{TAB}{2}\n\n' 'we found output:\n{TAB}{3}\n\n' 'instead of the expected:\n{TAB}{4}\n\n') e = e.format(source, content, '\n'.join(input_lines), repr(found), repr(submitted), TAB=TAB) raise RuntimeError(e) else: self.custom_doctest(decorator, input_lines, found, submitted) # When in verbatim mode, this holds additional submitted output # to be written in the final Sphinx output. # https://github.com/ipython/ipython/issues/5776 out_data = [] is_verbatim = decorator == '@verbatim' or self.is_verbatim if is_verbatim and data.strip(): # Note that `ret` in `process_block` has '' as its last element if # the code block was in verbatim mode. So if there is no submitted # output, then we will have proper spacing only if we do not add # an additional '' to `out_data`. This is why we condition on # `and data.strip()`. # The submitted output has no output prompt. If we want the # prompt and the code to appear, we need to join them now # instead of adding them separately---as this would create an # undesired newline. How we do this ultimately depends on the # format of the output regex. I'll do what works for the default # prompt for now, and we might have to adjust if it doesn't work # in other cases. Finally, the submitted output does not have # a trailing newline, so we must add it manually. out_data.append("{0} {1}\n".format(output_prompt, data)) return out_data def process_comment(self, data): """Process data fPblock for COMMENT token.""" if not self.is_suppress: return [data] def save_image(self, image_file): """ Saves the image file to disk. """ self.ensure_pyplot() command = 'plt.gcf().savefig("%s")' % image_file #print 'SAVEFIG', command # dbg self.process_input_line('bookmark ipy_thisdir', store_history=False) self.process_input_line('cd -b ipy_savedir', store_history=False) self.process_input_line(command, store_history=False) self.process_input_line('cd -b ipy_thisdir', store_history=False) self.process_input_line('bookmark -d ipy_thisdir', store_history=False) self.clear_cout() def process_block(self, block): """ process block from the block_parser and return a list of processed lines """ ret = [] output = None input_lines = None lineno = self.IP.execution_count input_prompt = self.promptin % lineno output_prompt = self.promptout % lineno image_file = None image_directive = None found_input = False for token, data in block: if token == COMMENT: out_data = self.process_comment(data) elif token == INPUT: found_input = True (out_data, input_lines, output, is_doctest, decorator, image_file, image_directive) = \ self.process_input(data, input_prompt, lineno) elif token == OUTPUT: if not found_input: TAB = ' ' * 4 linenumber = 0 source = 'Unavailable' content = 'Unavailable' if self.directive: linenumber = self.directive.state.document.current_line source = self.directive.state.document.current_source content = self.directive.content # Add tabs and join into a single string. content = '\n'.join([TAB + line for line in content]) e = ('\n\nInvalid block: Block contains an output prompt ' 'without an input prompt.\n\n' 'Document source: {0}\n\n' 'Content begins at line {1}: \n\n{2}\n\n' 'Problematic block within content: \n\n{TAB}{3}\n\n') e = e.format(source, linenumber, content, block, TAB=TAB) # Write, rather than include in exception, since Sphinx # will truncate tracebacks. sys.stdout.write(e) raise RuntimeError('An invalid block was detected.') out_data = \ self.process_output(data, output_prompt, input_lines, output, is_doctest, decorator, image_file) if out_data: # Then there was user submitted output in verbatim mode. # We need to remove the last element of `ret` that was # added in `process_input`, as it is '' and would introduce # an undesirable newline. assert (ret[-1] == '') del ret[-1] if out_data: ret.extend(out_data) # save the image files if image_file is not None: self.save_image(image_file) return ret, image_directive def ensure_pyplot(self): """ Ensures that pyplot has been imported into the embedded IPython shell. Also, makes sure to set the backend appropriately if not set already. """ # We are here if the @figure pseudo decorator was used. Thus, it's # possible that we could be here even if python_mplbackend were set to # `None`. That's also strange and perhaps worthy of raising an # exception, but for now, we just set the backend to 'agg'. if not self._pyplot_imported: if 'matplotlib.backends' not in sys.modules: # Then ipython_matplotlib was set to None but there was a # call to the @figure decorator (and ipython_execlines did # not set a backend). #raise Exception("No backend was set, but @figure was used!") import matplotlib matplotlib.use('agg') # Always import pyplot into embedded shell. self.process_input_line('import matplotlib.pyplot as plt', store_history=False) self._pyplot_imported = True def process_pure_python(self, content): """ content is a list of strings. it is unedited directive content This runs it line by line in the InteractiveShell, prepends prompts as needed capturing stderr and stdout, then returns the content as a list as if it were ipython code """ output = [] savefig = False # keep up with this to clear figure multiline = False # to handle line continuation multiline_start = None fmtin = self.promptin ct = 0 for lineno, line in enumerate(content): line_stripped = line.strip() if not len(line): output.append(line) continue # handle decorators if line_stripped.startswith('@'): output.extend([line]) if 'savefig' in line: savefig = True # and need to clear figure continue # handle comments if line_stripped.startswith('#'): output.extend([line]) continue # deal with lines checking for multiline continuation = u' %s:' % ''.join(['.'] * (len(str(ct)) + 2)) if not multiline: modified = u"%s %s" % (fmtin % ct, line_stripped) output.append(modified) ct += 1 try: ast.parse(line_stripped) output.append(u'') except Exception: # on a multiline multiline = True multiline_start = lineno else: # still on a multiline modified = u'%s %s' % (continuation, line) output.append(modified) # if the next line is indented, it should be part of multiline if len(content) > lineno + 1: nextline = content[lineno + 1] if len(nextline) - len(nextline.lstrip()) > 3: continue try: mod = ast.parse('\n'.join(content[multiline_start:lineno + 1])) if isinstance(mod.body[0], ast.FunctionDef): # check to see if we have the whole function for element in mod.body[0].body: if isinstance(element, ast.Return): multiline = False else: output.append(u'') multiline = False except Exception: pass if savefig: # clear figure if plotted self.ensure_pyplot() self.process_input_line('plt.clf()', store_history=False) self.clear_cout() savefig = False return output def custom_doctest(self, decorator, input_lines, found, submitted): """ Perform a specialized doctest. """ from .custom_doctests import doctests args = decorator.split() doctest_type = args[1] if doctest_type in doctests: doctests[doctest_type](self, args, input_lines, found, submitted) else: e = "Invalid option to @doctest: {0}".format(doctest_type) raise Exception(e)
def test_lazy_int(): f = StringIO() dump(42, f) f.seek(0) lj = LazyJSON(f) assert_equal(42, lj.load())
from io import StringIO o = StringIO() o.write("foo") print(o.getvalue()) o.seek(0) o.write("boo") print(o.getvalue())
class RdpWrapperTests(TestCase): """ Tests of RDP classifier wrapper functions """ def setUp(self): self.num_trials = 10 self.test_input1 = rdp_test_fasta.split('\n') self.expected_assignments1 = rdp_expected_out # Files for training self.reference_file = StringIO(rdp_training_sequences) self.reference_file.seek(0) self.taxonomy_file = StringIO(rdp_training_taxonomy) self.taxonomy_file.seek(0) self.training_dir = tempfile.mkdtemp(prefix='RdpTrainer_output_') # Sequences for trained classifier self.test_trained_input = rdp_trained_fasta.split("\n") def tearDown(self): rmtree(self.training_dir) def test_parse_rdp_assignment(self): seqid, direction, assignments = parse_rdp_assignment( "X67228\t\t" "Root\tnorank\t1.0\t" "Bacteria\tdomain\t1.0\t" "\"Proteobacteria\"\tphylum\t1.0\t" "Alphaproteobacteria\tclass\t0.9\t" "Rhizobiales\torder\t0.9\t" "Rhizobiaceae\tfamily\t0.47\t" "Rhizobium\tgenus\t0.46") self.assertEqual(seqid, "X67228") def test_assign_taxonomy_short_sequence(self): """assign_taxonomy should return Unclassifiable if sequence is too short """ assignments = assign_taxonomy([ '>MySeq 1', 'TTCCGGTTGATCCTGCCGGACCCGACTGCTATCCGGA', ]) self.assertEqual(assignments, {'MySeq 1': ('Unassignable', 1.0)}) def test_assign_taxonomy(self): """assign_taxonomy wrapper functions as expected This test may fail periodicially, but failure should be rare. """ unverified_seq_ids = set(self.expected_assignments1.keys()) for i in range(self.num_trials): obs_assignments = assign_taxonomy(self.test_input1) for seq_id in list(unverified_seq_ids): obs_lineage, obs_confidence = obs_assignments[seq_id] exp_lineage = self.expected_assignments1[seq_id] if (obs_lineage == exp_lineage): unverified_seq_ids.remove(seq_id) if not unverified_seq_ids: break messages = [] for seq_id in unverified_seq_ids: messages.append("Unable to verify %s trials" % self.num_trials) messages.append(" Sequence ID: %s" % seq_id) messages.append(" Expected: %s" % self.expected_assignments1[seq_id]) messages.append(" Observed: %s" % obs_assignments[seq_id][0]) messages.append(" Confidence: %s" % obs_assignments[seq_id][1]) # make sure all taxonomic results were correct at least once self.assertFalse(unverified_seq_ids, msg='\n'.join(messages)) def test_assign_taxonomy_alt_confidence(self): """assign_taxonomy wrapper functions as expected with alt confidence """ obs_assignments = assign_taxonomy(self.test_input1, min_confidence=0.95) for seq_id, assignment in list(obs_assignments.items()): obs_lineage, obs_confidence = assignment exp_lineage = self.expected_assignments1[seq_id] message = "Sequence ID: %s, assignment: %s" % (seq_id, assignment) self.assertTrue( exp_lineage.startswith(obs_lineage) or \ (obs_lineage == "Unclassified"), msg=message, ) self.assertTrue(obs_confidence >= 0.95, msg=message) def test_assign_taxonomy_file_output(self): """ assign_taxonomy wrapper writes correct file output when requested This function tests for sucessful completion of assign_taxonomy when writing to file, that the lines in the file roughly look correct by verifying how many are written (by zipping with expected), and that each line starts with the correct seq id. Actual testing of taxonomy data is performed elsewhere. """ output_fp = get_tmp_filename(\ prefix='RDPAssignTaxonomyTests',suffix='.txt') # convert the expected dict to a list of lines to match # file output expected_file_headers = list(self.expected_assignments1.keys()) expected_file_headers.sort() actual_return_value = assign_taxonomy(\ self.test_input1,min_confidence=0.95,output_fp=output_fp) actual_file_output = list(open(output_fp)) actual_file_output.sort() # remove the output_fp before running the tests, so if they # fail the output file is still cleaned-up remove(output_fp) # None return value on write to file self.assertEqual(actual_return_value, None) # check that each line starts with the correct seq_id -- not # checking the taxonomies or confidences here as these are variable and # tested elsewhere for a, e in zip(actual_file_output, expected_file_headers): self.assertTrue(a.startswith(e)) def test_train_rdp_classifier(self): results = train_rdp_classifier(self.reference_file, self.taxonomy_file, self.training_dir) exp_file_list = [ 'bergeyTrainingTree.xml', 'genus_wordConditionalProbList.txt', 'logWordPrior.txt', 'RdpClassifier.properties', 'wordConditionalProbIndexArr.txt', ] obs_file_list = listdir(self.training_dir) exp_file_list.sort() obs_file_list.sort() self.assertEqual(obs_file_list, exp_file_list) autogenerated_headers = { 'bergeyTree': 'bergeyTrainingTree', 'probabilityList': 'genus_wordConditionalProbList', 'wordPrior': 'logWordPrior', 'probabilityIndex': 'wordConditionalProbIndexArr', } for id, basename in autogenerated_headers.items(): obs_header = results[id].readline() exp_header = exp_training_header_template % basename self.assertEqual(exp_header, obs_header) def test_train_rdp_classifier_and_assign_taxonomy(self): obs = train_rdp_classifier_and_assign_taxonomy( self.reference_file, self.taxonomy_file, self.test_trained_input, min_confidence=0.80, model_output_dir=self.training_dir) exp = { 'X67228': ('Bacteria;Proteobacteria;Alphaproteobacteria;Rhizobiales;' 'Rhizobiaceae;Rhizobium', 1.0) } self.assertEqual(obs, exp) def test_train_rdp_classifier_and_assign_taxonomy_no_model_output(self): obs = train_rdp_classifier_and_assign_taxonomy(self.reference_file, self.taxonomy_file, self.test_trained_input) exp = { 'X67228': ('Bacteria;Proteobacteria;Alphaproteobacteria;Rhizobiales;' 'Rhizobiaceae;Rhizobium', 1.0) } self.assertEqual(obs, exp)
def get_env_variable(var_name, default=False): """ Get the environment variable or return exception :param var_name: Environment Variable to lookup :param default: :return: returns environment variables """ try: return os.environ[var_name] except KeyError: # try and catch to support both python 2 and python 3 from io import StringIO import configparser env_file = os.environ.get('PROJECT_ENV_FILE', PROJECT_ROOT + "/.env") try: config = StringIO() config.write("[DATA]\n") config.write(open(env_file).read()) config.seek(0, os.SEEK_SET) cp = configparser.RawConfigParser() cp.read_file(config) value = dict(cp.items('DATA'))[var_name.lower()] if value.startswith('"') and value.endswith('"'): value = value[1:-1] elif value.startswith("'") and value.endswith("'"): value = value[1:-1] os.environ.setdefault(var_name, value) return value except (KeyError, IOError): if default is not False: return default from django.core.exceptions import ImproperlyConfigured error_msg = "Either set the env variable '{var}' or place it in your " \ "{env_file} file as '{var} = VALUE'" raise ImproperlyConfigured(error_msg.format(var=var_name, env_file=env_file))