def __init__(self, file_stream): self.file_stream = file_stream self.header = "" self.col = 0 self.row = 0 self.data = [] self.meta = md.MetaData()
def create_files(client_id): #peer_folder = "peer" + str(client_id) + "/" meta_file_name = "meta_data_" + str(client_id) meta_file = open(meta_file_name + ".txt") m_files = [] client_files =[] while 1 : info = meta_file.readline() if not info: break else: info = info.strip("\n") info = info.split(",") file_name = info[0] size = info[1] m_files.append((file_name,size)) meta_file.close() directory = "test_files/" if not os.path.exists(directory): os.mkdir(directory) if not os.path.exists("downloads/"): os.mkdir("downloads/") for f in m_files: file_name,size = f new_file = open(directory + file_name,"wb+") new_file.write(create_file_contents(int(size)) + "\n") new_file.close() client_files.append(FileInfo(file_name,size,time.localtime(),client_id)) return MetaData(directory,client_files)
def _read_table(self, qtype = QTABLE): if self._options.pandas: self._buffer.skip() # ignore attributes self._buffer.skip() # ignore dict type stamp columns = self._read_object() data = self._read_object() odict = OrderedDict() meta = MetaData(qtype = QTABLE) for i in range(len(columns)): column_name = columns[i] if isinstance(columns[i], str) else columns[i].decode("utf-8") if isinstance(data[i], str): # convert character list (represented as string) to numpy representation meta[column_name] = QSTRING odict[column_name] = pandas.Series(list(data[i]), dtype = numpy.str).replace(b' ', numpy.nan) elif isinstance(data[i], bytes): # convert character list (represented as string) to numpy representation meta[column_name] = QSTRING odict[column_name] = pandas.Series(list(data[i].decode()), dtype = numpy.str).replace(b' ', numpy.nan) elif isinstance(data[i], (list, tuple)): meta[column_name] = QGENERAL_LIST tarray = numpy.ndarray(shape = len(data[i]), dtype = numpy.dtype('O')) for j in range(len(data[i])): tarray[j] = data[i][j] odict[column_name] = tarray else: meta[column_name] = data[i].meta.qtype odict[column_name] = data[i] df = pandas.DataFrame(odict) df.meta = meta return df else: return QReader._read_table(self, qtype = qtype)
def create_files(client_id): client_files = [] directory = "test_files/" num_files_names = len(file_names) -1 for i in range(1, max_num_files +1): random_size = random.randint(1,max_file_size) file_name = file_names[random.randint(0,num_files_names)] client_files.append(FileInfo(file_name,random_size)) file = open(directory + file_name + ".txt","wb+") file.write(create_file_contents(random_size) + "\n") file.close() return MetaData(directory,client_files)
def __init__(self): self.id_num = None self.meta_data = MetaData("this", []) self.download_folder = None self.create_downloads_folder() self.peers = dict() self.messages_received = dict() self.messages_sent = dict() self.download_queue = Queue.Queue() #self.name_server= Pyro4.locateNS() self.file_server = None self.ip_address = None self.client_daemon = None
def initMetaData(self): print('计算MD5...') MD5 = IOUtils.getMD5(self.path) print(MD5) fileSize = os.path.getsize(self.path) fileName = os.path.basename(self.path) #文件大小小于100M 不分块 if fileSize < ConversionUtils.megabytes2Bytes(100): self.blockNum = 0 else: self.blockNum = IOUtils.getPartionBlockNum(self.path, self.blockSize) metadata = MetaData(fileSize, fileName, MD5, self.blockNum) self.metadataPath = os.path.dirname(self.path) + os.sep + 'METADATA' IOUtils.serializeObj2Pkl(metadata, self.metadataPath) self.fileList.append(self.metadataPath) print('元数据初始化完毕')
def _read_list(self, qtype): if self._options.pandas: self._options.numpy_temporals = True list = QReader._read_list(self, qtype = qtype) if self._options.pandas: if -abs(qtype) not in [QMONTH, QDATE, QDATETIME, QMINUTE, QSECOND, QTIME, QTIMESTAMP, QTIMESPAN, QSYMBOL]: null = QNULLMAP[-abs(qtype)][1] ps = pandas.Series(data = list).replace(null, numpy.NaN) else: ps = pandas.Series(data = list) ps.meta = MetaData(qtype = qtype) return ps else: return list
#!/usr/bin/env python from django.core.management import setup_environ import os, sys, subprocess sys.path.append(os.path.join(os.path.dirname(__file__), 'tacc_stats_site')) sys.path.append(os.path.join(os.path.dirname(__file__), 'stampede')) import settings setup_environ(settings) import stampede.views as views import stampede.sys_path_append as sys_path_append import MetaData path = sys_path_append.pickles_dir date_str = sys.argv[1] for date in os.listdir(path): date_str = subprocess.check_output( ['date', '--date', date_str, '+%Y-%m-%d']) #if date.strip() != date_str.strip(): continue if not '2013-11-' in date: continue print 'Run update for', date meta = MetaData.MetaData(os.path.join(path, date)) #if os.path.exists(meta.meta_path): continue meta.load_update() print 'Number of pickle files to upload into DB', len(meta.json.keys()) views.update(meta=meta)
import pandas as pd From sqlalchemy import create_engine, MetaData, TEXT, Integer, Float, DateTime, Table, Column ############################# # The first step is to create an engine # Use `mysqlconnector` as driver program, others include `mysqldb` etc. # Here, use `root` user, other users can be used, check mysql users via # `SELECT user FROM mysql` in mysql terminal # `password` is the password for the user # `@localhost` represents the server, here we use local server, default port is # 3306 # `dbname` is the database name engine = create_engin("mysql+mysqlconnector://root:"+"password"+"@localhost/dbname") meta = MetaData(bind=engine) # create metadata for the database ############################# # Then, we assume data is stored in `df1` (which is a pd.DataFrame), we wan to # store it in the database `example` # First of all, we drop any existing tables table_1.drop(engine) # assume `table_1` is the name for table 1 # Second, we create SQLAlchemy.Table to specify format the structure of database # tables table_1 = Table('table1', meta, Column('ID', Integer, primary_key = True, autoincrement = False), Column('date', DateTime, nullable = False), Column('value', Float, nullable = True), extend_existing = True