def build_message_list_from_all_files(): message_list = [] from utils import get_file_data for fname in os.listdir(os.path.join("data", "csv")): if not fname.startswith("agmark"): data = get_file_data(fname) or [] for row in data: for col in row: if col and re.search('[a-zA-Z]+', col) \ and not col.startswith('"') \ and not re.search(".+@.+\.", col) \ and not re.search("www\.", col) \ and not re.search(".com$", col) \ and not re.search(".in$", col): message_list.append(col.strip()) print fname #if len(message_list) > 100: break message_list = list(set(message_list)) message_list.sort() with open(os.path.join("data", "translations", "message_list.json"), "w") as message_list_file: message_list_file.write( json.dumps(message_list, indent=1, sort_keys=True).encode("utf-8"))
def add_values(fname, fpath): headers, data = utils.get_file_data(fpath) if not data: return if not headers["title"]: return add_regions(data) for key, value in exclude_from_headers.iteritems(): headers["title"] = headers["title"].replace(key, value) data_set = headers["title"][:170].replace(" ", "-").lower() if not webnotes.conn.exists("Data Set", data_set): try: webnotes.bean({ "doctype": "Data Set", "name": data_set, "title": headers["title"], "description": headers["description"], "raw_filename": fname, "url": headers["url"], "source": "data.gov.in", "row_count": len(data), "__islocal": 1 }).save() except MySQLdb.IntegrityError, e: pass
def get_args(form_dict): properties.load_properties() file_properties = properties.properties[form_dict["fname"]] file_data = utils.get_file_data(form_dict["fname"]) group = file_properties.get("groups", ["Other"])[0] chart_type = file_properties.get("chart_type") or "Line" args = { "file_data": file_data, "properties": file_properties, "chart_type": chart_type, "group": group, "group_info": properties.groups[group], "consolelog": consolelog, "json": json, "len": len, "title": file_properties.get("title", form_dict["fname"]), "description": file_properties.get("description", form_dict["fname"]) } if chart_type == "Map": args["map_data"] = get_map_data(file_data, file_properties) args["legend"] = file_properties.get("legend") or "" else: args["chart_data"] = get_chart_data(file_data, file_properties, chart_type) return args
def huffman(filename: str, output_filename: str): code = utils.get_huffman_code(utils.get_file_data(filename)) header = make_code_header(code) encode(filename, output_filename, code, header) return {"entropy": utils.entropy(output_filename), "average_encoding": sum([len(x) for x in code.values()]) / len(code.values()), "compression": utils.compression(filename, output_filename) }
def writetemplate(template, proxy, output_file, rulesfile=None): domains_content = final_list(rulesfile) proxy_content = get_file_data(template) proxy_content = proxy_content.replace('__PROXY__', proxy) proxy_content = proxy_content.replace('__DOMAINS__', domains_content) with open(output_file, 'w') as file_obj: file_obj.write(proxy_content)
def set_properties(): global properties load_properties() for fname in os.listdir(os.path.join("data", "csv")): if not fname.startswith("."): data = get_file_data(fname) #data = None set_property_for(fname, data) save_properties() make_group_datasets()
def make_word_count(): for ds in webnotes.conn.sql( """select name, raw_filename from `tabData Set`""", as_dict=1): from webnotes.utils import get_path if ds.raw_filename: headers, data = utils.get_file_data( get_path("app", "downloads", "data.gov.in", ds.raw_filename)) webnotes.conn.set_value("Data Set", ds.name, "row_count", len(data)) webnotes.conn.commit()
def config(self, args): _ip = args.get('ip') content = get_file_data(options.openssl_conf) replaced = re.findall('.*IP:(.*)\n?', content)[0] content = content.replace(replaced, _ip) set_file_data(options.openssl_conf, content) logging.info('generate .key and .crt file') os.system(options.openssl_cmd) self.backup_key_and_ca() return {"message": "config openssl.cnf successfully"}
def new_config(self, args): ip = args.get('ip') content = get_file_data(options.openssl_conf) target = re.findall('\[ v3_ca \]([\s\S]*?)\[', content)[0] replaced = re.findall('(.*subjectAltName=.*)', target)[0] new_replaced = "subjectAltName=IP:%s\n" % ip new_target = target.replace(replaced, new_replaced) content = content.replace(target, new_target) set_file_data(options.openssl_conf, content) logging.info('config openssl success') os.system(options.openssl_cmd) logging.info('generate .key and .crt file') self.backup_key_and_ca()
def config(self, args): logging.info('config args:%s' % str(args)) _ip = args.get('ip') _path = args.get('paths') content = get_file_data(options.logstash_forwarder_conf) ip_replaced = re.findall('.*servers": \[ "(.*):5043', content)[0] path_replaced = re.findall('"paths": \[ "(.*)" ].*', content)[0] logging.info('str_ip :%s, str_path :%s' % (ip_replaced, path_replaced)) content = content.replace(ip_replaced, _ip) logging.info('content:%s' % content) content = content.replace(path_replaced, _path) logging.info('content:%s' % content) set_file_data(options.logstash_forwarder_conf, content) return {"message": "config /etc/logstash-forwarder.conf successfully"}
def process_file(fpath): sys.stdout.write(".") sys.stdout.flush() headers, data = utils.get_file_data(os.path.basename(fpath)) if data and headers: # dataset db.insert_dataset({ "name": headers["title"], "description": headers["description"], "raw_filename": headers["file_name"], "url": headers["url"], "source": "data.gov.in" }) data = clean_data(data) set_series(headers, data) set_data(headers, data)