def __parse_metadata(self): self.meta.init(self.col) for i in range(self.col): # count self.meta.count[i] = float(len(self.data_sorted[i])) # sort self.data_sorted[i].sort() # quartiles self.meta.min[i] = md.quartile(self.data_sorted[i], 0) self.meta.q25[i] = md.quartile(self.data_sorted[i], 0.25) self.meta.q50[i] = md.quartile(self.data_sorted[i], 0.5) self.meta.q75[i] = md.quartile(self.data_sorted[i], 0.75) self.meta.max[i] = md.quartile(self.data_sorted[i], 1) # mean calculation for j in self.data_sorted[i]: self.meta.mean[i] += j if (len(self.data_sorted[i])): self.meta.mean[i] /= len(self.data_sorted[i]) # standard deviation calculation for j in self.data_sorted[i]: self.meta.std[i] += (self.meta.mean[i] - j)**2 if (len(self.data_sorted[i])): self.meta.std[i] /= len(self.data_sorted[i]) self.meta.std[i] = math.sqrt(self.meta.std[i])
def addLink(form, userId): linkId = utils.uuid() kwargs = {} kwargs["id"] = linkId kwargs["user_id"] = userId if form.has_key("url") : kwargs["url"]=form["url"] kwargs["url_hash"] = hashlib.md5(kwargs["url"]).hexdigest() else: raise Exception("an url must be added") if form.has_key("title"): kwargs["title"]=form["title"] if form.has_key("description"): kwargs["description"]=form["description"] if form.has_key("typeName"): kwargs["type_name"]=form["typeName"] kwargs["modified_at"]=utils.timeStamp() #FIXME use other algorithms to get icon logo iconUrl = utils.extractHomeUrl(kwargs["url"])+"favicon.ico" conn = db.connect() db.insertToTable(conn, "link", **kwargs) MetaData.addIconUrl(linkId, iconUrl) return kwargs
def __init__(self): self.id_num = None self.meta_data = MetaData("this", []) self.download_folder = None self.create_downloads_folder() self.peers = dict() self.messages_received = dict() self.messages_sent = dict() self.download_queue = Queue.Queue() #self.name_server= Pyro4.locateNS() self.file_server = None self.ip_address = None self.client_daemon = None
def __init__(self, file_stream): self.file_stream = file_stream self.header = "" self.col = 0 self.row = 0 self.data = [] self.meta = md.MetaData()
def create_files(client_id): #peer_folder = "peer" + str(client_id) + "/" meta_file_name = "meta_data_" + str(client_id) meta_file = open(meta_file_name + ".txt") m_files = [] client_files =[] while 1 : info = meta_file.readline() if not info: break else: info = info.strip("\n") info = info.split(",") file_name = info[0] size = info[1] m_files.append((file_name,size)) meta_file.close() directory = "test_files/" if not os.path.exists(directory): os.mkdir(directory) if not os.path.exists("downloads/"): os.mkdir("downloads/") for f in m_files: file_name,size = f new_file = open(directory + file_name,"wb+") new_file.write(create_file_contents(int(size)) + "\n") new_file.close() client_files.append(FileInfo(file_name,size,time.localtime(),client_id)) return MetaData(directory,client_files)
def _read_table(self, qtype = QTABLE): if self._options.pandas: self._buffer.skip() # ignore attributes self._buffer.skip() # ignore dict type stamp columns = self._read_object() data = self._read_object() odict = OrderedDict() meta = MetaData(qtype = QTABLE) for i in range(len(columns)): column_name = columns[i] if isinstance(columns[i], str) else columns[i].decode("utf-8") if isinstance(data[i], str): # convert character list (represented as string) to numpy representation meta[column_name] = QSTRING odict[column_name] = pandas.Series(list(data[i]), dtype = numpy.str).replace(b' ', numpy.nan) elif isinstance(data[i], bytes): # convert character list (represented as string) to numpy representation meta[column_name] = QSTRING odict[column_name] = pandas.Series(list(data[i].decode()), dtype = numpy.str).replace(b' ', numpy.nan) elif isinstance(data[i], (list, tuple)): meta[column_name] = QGENERAL_LIST tarray = numpy.ndarray(shape = len(data[i]), dtype = numpy.dtype('O')) for j in range(len(data[i])): tarray[j] = data[i][j] odict[column_name] = tarray else: meta[column_name] = data[i].meta.qtype odict[column_name] = data[i] df = pandas.DataFrame(odict) df.meta = meta return df else: return QReader._read_table(self, qtype = qtype)
def create_files(client_id): client_files = [] directory = "test_files/" num_files_names = len(file_names) -1 for i in range(1, max_num_files +1): random_size = random.randint(1,max_file_size) file_name = file_names[random.randint(0,num_files_names)] client_files.append(FileInfo(file_name,random_size)) file = open(directory + file_name + ".txt","wb+") file.write(create_file_contents(random_size) + "\n") file.close() return MetaData(directory,client_files)
def __init__(self): self.id_num = None self.meta_data = MetaData("this",[]) self.download_folder = None self.create_downloads_folder() self.peers = dict() self.messages_received = dict() self.messages_sent = dict() self.download_queue = Queue.Queue() #self.name_server= Pyro4.locateNS() self.file_server = None self.ip_address = None self.client_daemon = None
def parseDocument(self, jdomDoc): root = jdomDoc.rootElement name = root.getAttribute("name").value date = root.getAttribute("date").value self.metaData = MetaData(name, date) self.metaData.setPredicates(self.parsePredicates(root)) self.metaData.setDataSetDescriptions(self.parseDataSetDescriptions(root)) # variableDefinitions = self.parseVariableDefinitions (root) # self.metaData.setVariableDefinitions (variableDefinitions) conditions = self.parseConditions(root) for condition in conditions: self.metaData.addCondition(condition)
def initMetaData(self): print('计算MD5...') MD5 = IOUtils.getMD5(self.path) print(MD5) fileSize = os.path.getsize(self.path) fileName = os.path.basename(self.path) #文件大小小于100M 不分块 if fileSize < ConversionUtils.megabytes2Bytes(100): self.blockNum = 0 else: self.blockNum = IOUtils.getPartionBlockNum(self.path, self.blockSize) metadata = MetaData(fileSize, fileName, MD5, self.blockNum) self.metadataPath = os.path.dirname(self.path) + os.sep + 'METADATA' IOUtils.serializeObj2Pkl(metadata, self.metadataPath) self.fileList.append(self.metadataPath) print('元数据初始化完毕')
def _read_list(self, qtype): if self._options.pandas: self._options.numpy_temporals = True list = QReader._read_list(self, qtype = qtype) if self._options.pandas: if -abs(qtype) not in [QMONTH, QDATE, QDATETIME, QMINUTE, QSECOND, QTIME, QTIMESTAMP, QTIMESPAN, QSYMBOL]: null = QNULLMAP[-abs(qtype)][1] ps = pandas.Series(data = list).replace(null, numpy.NaN) else: ps = pandas.Series(data = list) ps.meta = MetaData(qtype = qtype) return ps else: return list
def check_updates(self, type_, path): '''check whether updates are available''' name = os.path.basename(path) meta = self.fetch_metadata(type_, name) if not meta or not meta.get('version') or not meta.get('required emesene version'): return False local_meta = MetaData.get_metadata_from_path(path) if not local_meta or not local_meta.get('required emesene version'): return True if self.version_value(meta.get('required emesene version')) > \ self.version_value(Info.EMESENE_VERSION): return False if not local_meta.get('version'): return True if self.version_value(meta.get('version')) > self.version_value(local_meta.get('version')): return True return False
def check_updates(self, type_, path): '''check whether updates are available''' name = os.path.basename(path) meta = self.fetch_metadata(type_, name) if not meta or not meta.get('version') or not meta.get( 'required emesene version'): return False local_meta = MetaData.get_metadata_from_path(path) if not local_meta or not local_meta.get('required emesene version'): return True if self.version_value(meta.get('required emesene version')) > \ self.version_value(Info.EMESENE_VERSION): return False if not local_meta.get('version'): return True if self.version_value(meta.get('version')) > self.version_value( local_meta.get('version')): return True return False
import pandas as pd From sqlalchemy import create_engine, MetaData, TEXT, Integer, Float, DateTime, Table, Column ############################# # The first step is to create an engine # Use `mysqlconnector` as driver program, others include `mysqldb` etc. # Here, use `root` user, other users can be used, check mysql users via # `SELECT user FROM mysql` in mysql terminal # `password` is the password for the user # `@localhost` represents the server, here we use local server, default port is # 3306 # `dbname` is the database name engine = create_engin("mysql+mysqlconnector://root:"+"password"+"@localhost/dbname") meta = MetaData(bind=engine) # create metadata for the database ############################# # Then, we assume data is stored in `df1` (which is a pd.DataFrame), we wan to # store it in the database `example` # First of all, we drop any existing tables table_1.drop(engine) # assume `table_1` is the name for table 1 # Second, we create SQLAlchemy.Table to specify format the structure of database # tables table_1 = Table('table1', meta, Column('ID', Integer, primary_key = True, autoincrement = False), Column('date', DateTime, nullable = False), Column('value', Float, nullable = True), extend_existing = True
class MetaDataXmlParser: def __init__(self, filename): self.originalFilename = filename builder = jdom.input.SAXBuilder() if filename.find("http://") == 0: doc = builder.build(filename) self.documentSource = "web" elif filename.find("file://") == 0: filename = filename[7:] doc = builder.build(io.FileInputStream(filename)) self.documentSource = "localFile" elif filename.find("httpIndirect://") == 0: reader = TextHttpIndirectFileReader(filename) reader.read() text = reader.getText() filename = "junk99.xml" f = open(filename, "w") f.write(text) f.close() doc = builder.build(io.FileInputStream(filename)) self.documentSource = "localFile" else: raise NameError, "unrecognized data source protocol in filename: %s" % filename self.parseDocument(doc) # ------------------------------------------------------------------------ def parseDocument(self, jdomDoc): root = jdomDoc.rootElement name = root.getAttribute("name").value date = root.getAttribute("date").value self.metaData = MetaData(name, date) self.metaData.setPredicates(self.parsePredicates(root)) self.metaData.setDataSetDescriptions(self.parseDataSetDescriptions(root)) # variableDefinitions = self.parseVariableDefinitions (root) # self.metaData.setVariableDefinitions (variableDefinitions) conditions = self.parseConditions(root) for condition in conditions: self.metaData.addCondition(condition) # ------------------------------------------------------------------------ def getDataFileUri(self): return self.dataFileUri # ------------------------------------------------------------------------ def getMetaData(self): return self.metaData # ------------------------------------------------------------------------ def parsePredicates(self, root): result = {} children = root.getChildren("predicate") for child in children: category = child.getAttribute("category").value value = child.getAttribute("value").value result[category] = value return result # ------------------------------------------------------------------------ def parseDataSetDescriptions(self, root): result = [] children = root.getChildren("dataset") for child in children: status = child.getAttribute("status").value type = child.getAttribute("type").value uri = child.getChild("uri").textTrim dataSetDescription = DataSetDescription(uri, status, type) result.append(dataSetDescription) return result # ------------------------------------------------------------------------ def createFullUrlFromRelativeFilename(self, filename): assert self.originalFilename.count("http://") == 1 tokens = self.originalFilename.split("/") assert len(tokens) > 3 max = len(tokens) baseUrl = "" for t in range(max - 1): baseUrl += "%s/" % tokens[t] return "%s%s" % (baseUrl, filename) # ------------------------------------------------------------------------ def parseOrganism(self, root): organismElement = root.getChild("organism") species = organismElement.getChild("species").textTrim strain = organismElement.getChild("strain").textTrim return (species, strain) # ------------------------------------------------------------------------ def parsePredications(self, root): """ return a hash, keyed on predication name for instance <predicate category='species' value='Halobacterium NRC-1'/> <predicate category='strain' value='afsQ2 knockout'/> <predicate category='perturbation' value='genetic'/> becomes one element in a hash: {'species': 'Halobacterium NRC-1', 'strain': 'afsQ2 knockout', 'perturbation': 'genetic'} """ predicationElements = root.getChildren("predicate") result = {} for p in predicationElements: name = p.getAttribute("category").value value = p.getChild("value") result[name] = value return result # ------------------------------------------------------------------------ def parseVariableDefinitions(self, root): """ return a hash keyed by definition name, where each hash value is a (nested) hash, containing all element attributes, and an array of strings caputring the range of values permitted in variables of this type. for example: <variableDefinition name='time' units='minutes'> <value>0</value> <value>30</value> <value>60</value> <value>90</value> </variableDefinition> becomes {'time': {'values': ['0', '30', '60', '90'], 'units': 'minutes', 'name': 'time'}} note that the 'name' attribute is plucked out and used as the definition's key, but also appears in the full list. """ variableDefinitionElements = root.getChildren("variableDefinition") result = {} for definition in variableDefinitionElements: defHash = {} # eg, {'name':'time', 'units'='minutes', 'values'=[0,30,60,90] variableDefitionName = None for attribute in definition.getAttributes(): name = attribute.name value = attribute.value defHash[name] = value valueElements = definition.getChildren("value") values = [] for v in valueElements: values.append(v.textTrim) assert len(values) > 0 defHash["values"] = values assert defHash.has_key("name") defName = defHash["name"] result[defName] = defHash # print 'p: %s' % result return result # ------------------------------------------------------------------------ def parseConditions(self, root): """ return an array of Condition objects for example, from <condition alias='C30'> <variable name='gamma irradiation' value='false'/> <variable name='time' value='30' units='minutes'/> </condition> <condition alias='G0'> <variable name='gamma irradiation' value='true'/> <variable name='time' value='0' units='minutes'/> </condition> string representation of the two resulting condition objects: [note that no units ('None') are specified for irradiation] condition: C30, gamma irradiation: false, None, time: 30, minutes condition: G0 gamma irradiation: true, None time: 0, minutes """ conditionElements = root.getChildren("condition") result = [] # use an array so that conditions can be stored in the # order in which they appear in the file, which may # reflect an order the user carefully created for c in conditionElements: alias = c.getAttribute("alias").value assert alias condition = Condition(alias) for v in c.getChildren("variable"): name = v.getAttribute("name").value value = v.getAttribute("value").value unitsAttribute = v.getAttribute("units") units = None if unitsAttribute: units = unitsAttribute.value condition.addVariable(Variable(name, value, units)) result.append(condition) return result
class Client(): def __init__(self): self.id_num = None self.meta_data = MetaData("this",[]) self.download_folder = None self.create_downloads_folder() self.peers = dict() self.messages_received = dict() self.messages_sent = dict() self.download_queue = Queue.Queue() #self.name_server= Pyro4.locateNS() self.file_server = None self.ip_address = None self.client_daemon = None #### This is called from other peers, and calls this on neigbhors def query(self,messageId,TTL,file_name,sender_info): if self.messages_received.has_key(messageId) or self.messages_sent.has_key(messageId): print "Not sending query from " + str(self.ip_address) return else: if self.ip_address == sender_info: self.messages_sent[messageId] = True else: self.messages_received[messageId] = sender_info if TTL > 0: TTL = TTL -1 for peer in self.peers.values(): peer.query(messageId,TTL,file_name,self.ip_address) if self.meta_data.has_file(file_name): self.send_hit_query(messageId,TTL,file_name,self.ip_address) ##### If peer as the file, send a hit query def send_hit_query(self,messageId,TTL,file_name,sender_info): print "Sending a Hit for: " + file_name + " from client: " + str(self.ip_address) + " orgin: " + str(sender_info) peer_info = self.messages_received[messageId] peer = self.peers[peer_info] peer.hit_query(messageId,TTL,file_name,sender_info) #### called from peer that is relaying a query message back def hit_query(self,messageId,TTL,file_name,sender_info): if self.messages_sent.has_key(messageId): print str(sender_info) + " has the file from client" + str(self.ip_address) #### download file else: self.send_hit_query(messageId,TTL,file_name,sender_info) pass #### This intiates the download process, exits if files is not on index def obtain(self,file_name): print "peer: " + str(peer_with_file_id) if len(peer_with_file_id ) > 0: peer_uri = self.name_server.lookup(str(peer_with_file_id[0])) peer = Pyro4.Proxy(peer_uri) self.get_file(file_name,peer) else: print file_name + " is not on Index" #### Ask the peer for its ip information and put the job in the Queue def get_file(self,file_name,peer): peer_ip,peer_port = peer.get_addr() self.download_queue.put((peer_ip,peer_port,file_name)) getter = threading.Thread(target= self.download_file) getter.start() print "Starting download thread" ##### This is called in a seperate Thread, pulla job from the queue and #####download into the test_files folder, let the index know u have it def download_file(self): peer_ip,peer_port, file_name = self.download_queue.get() sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) file = open(self.download_folder + file_name + ".txt","wb+") try: print "\nConnecting to fileserver!!!!\n" sock.connect((peer_ip,peer_port)) sock.sendall(file_name + "\n") while 1: file_data = sock.recv(1024) if not file_data: print "\nData was empty" break else: print file_data file.write(file_data) self.server.add_file_to_index(self.id_num,file_name) self.meta_data.add_file(file_name) finally: file.close() sock.close() def add_peer(self,peer_id,peer): self.peers[peer_id] = peer def get_addr(self): return (self.ip,self.port) def delete_file(self,file_name): self.meta_data.remove_file(file_name) #delete from disk def set_meta_data(self, meta_data): self.meta_data = meta_data def add_file(self,file_name): self.meta_data.add_file(file_name) def list_files_on_index(self): pass ###### Helper functions ###### #### This is started in a seperate thread #### def register_with_servers(self): self.client_daemon = Pyro4.Daemon() self.start_file_server() self.register_to_naming_server() self.client_daemon.requestLoop() def register_to_naming_server(self): client_uri = self.client_daemon.register(self) ip,port = self.ip_address self.name_server.register(str(ip) + str(port),client_uri) def start_file_server(self): self.file_server = FileServer.FileServer(self) self.ip_address = self.file_server.start_server() def start_client(self): daemon_thread = threading.Thread(target=self.register_with_servers) daemon_thread.start() print "returning from start" def stop_client(self): self.file_server.stop_server() self.client_daemon.shutdown() print "Stopping client: " + str(self.id_num)
#!/usr/bin/env python from django.core.management import setup_environ import os, sys, subprocess sys.path.append(os.path.join(os.path.dirname(__file__), 'tacc_stats_site')) sys.path.append(os.path.join(os.path.dirname(__file__), 'stampede')) import settings setup_environ(settings) import stampede.views as views import stampede.sys_path_append as sys_path_append import MetaData path = sys_path_append.pickles_dir date_str = sys.argv[1] for date in os.listdir(path): date_str = subprocess.check_output( ['date', '--date', date_str, '+%Y-%m-%d']) #if date.strip() != date_str.strip(): continue if not '2013-11-' in date: continue print 'Run update for', date meta = MetaData.MetaData(os.path.join(path, date)) #if os.path.exists(meta.meta_path): continue meta.load_update() print 'Number of pickle files to upload into DB', len(meta.json.keys()) views.update(meta=meta)
class Client(): def __init__(self): self.id_num = None self.meta_data = MetaData("this", []) self.download_folder = None self.create_downloads_folder() self.peers = dict() self.messages_received = dict() self.messages_sent = dict() self.download_queue = Queue.Queue() #self.name_server= Pyro4.locateNS() self.file_server = None self.ip_address = None self.client_daemon = None #### This is called from other peers, and calls this on neigbhors def query(self, messageId, TTL, file_name, sender_info): if self.messages_received.has_key( messageId) or self.messages_sent.has_key(messageId): print "Not sending query from " + str(self.ip_address) return else: if self.ip_address == sender_info: self.messages_sent[messageId] = True else: self.messages_received[messageId] = sender_info if TTL > 0: TTL = TTL - 1 for peer in self.peers.values(): peer.query(messageId, TTL, file_name, self.ip_address) if self.meta_data.has_file(file_name): self.send_hit_query(messageId, TTL, file_name, self.ip_address) ##### If peer as the file, send a hit query def send_hit_query(self, messageId, TTL, file_name, sender_info): print "Sending a Hit for: " + file_name + " from client: " + str( self.ip_address) + " orgin: " + str(sender_info) peer_info = self.messages_received[messageId] peer = self.peers[peer_info] peer.hit_query(messageId, TTL, file_name, sender_info) #### called from peer that is relaying a query message back def hit_query(self, messageId, TTL, file_name, sender_info): if self.messages_sent.has_key(messageId): print str(sender_info) + " has the file from client" + str( self.ip_address) #### download file else: self.send_hit_query(messageId, TTL, file_name, sender_info) pass #### This intiates the download process, exits if files is not on index def obtain(self, file_name): print "peer: " + str(peer_with_file_id) if len(peer_with_file_id) > 0: peer_uri = self.name_server.lookup(str(peer_with_file_id[0])) peer = Pyro4.Proxy(peer_uri) self.get_file(file_name, peer) else: print file_name + " is not on Index" #### Ask the peer for its ip information and put the job in the Queue def get_file(self, file_name, peer): peer_ip, peer_port = peer.get_addr() self.download_queue.put((peer_ip, peer_port, file_name)) getter = threading.Thread(target=self.download_file) getter.start() print "Starting download thread" ##### This is called in a seperate Thread, pulla job from the queue and #####download into the test_files folder, let the index know u have it def download_file(self): peer_ip, peer_port, file_name = self.download_queue.get() sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) file = open(self.download_folder + file_name + ".txt", "wb+") try: print "\nConnecting to fileserver!!!!\n" sock.connect((peer_ip, peer_port)) sock.sendall(file_name + "\n") while 1: file_data = sock.recv(1024) if not file_data: print "\nData was empty" break else: print file_data file.write(file_data) self.server.add_file_to_index(self.id_num, file_name) self.meta_data.add_file(file_name) finally: file.close() sock.close() def add_peer(self, peer_id, peer): self.peers[peer_id] = peer def get_addr(self): return (self.ip, self.port) def delete_file(self, file_name): self.meta_data.remove_file(file_name) #delete from disk def set_meta_data(self, meta_data): self.meta_data = meta_data def add_file(self, file_name): self.meta_data.add_file(file_name) def list_files_on_index(self): pass ###### Helper functions ###### #### This is started in a seperate thread #### def register_with_servers(self): self.client_daemon = Pyro4.Daemon() self.start_file_server() self.register_to_naming_server() self.client_daemon.requestLoop() def register_to_naming_server(self): client_uri = self.client_daemon.register(self) ip, port = self.ip_address self.name_server.register(str(ip) + str(port), client_uri) def start_file_server(self): self.file_server = FileServer.FileServer(self) self.ip_address = self.file_server.start_server() def start_client(self): daemon_thread = threading.Thread(target=self.register_with_servers) daemon_thread.start() print "returning from start" def stop_client(self): self.file_server.stop_server() self.client_daemon.shutdown() print "Stopping client: " + str(self.id_num)