def test(): infile = 'data.csv' df = pd.read_csv(infile) df = df.groupby('X') db = Db(TEMP_DB) with open('script.sql', 'w') as file: for d in df: (table_name, data) = d t = Table(db, table_name, data, file, test_mode=True) t.prepare_temp_table()
class Control: buckets = dict() table = Table() info = {"collision": 0, "overflow": 0} page_head = Page() def hash_function(self, text): value = str() for i in text: char_value = math.sqrt(ord(i)) value += str(int(math.modf(char_value)[0] * 10)) return int(value) def readfile(self, file): page = self.page_head tuples = list() with open(file, 'r') as reader: for reg in reader: reg = reg.replace("\n", "") tuples.append(Tuple(reg)) if not page.set_register(reg): page = page.next self.table.tuples = tuples self.dohash() def dohash(self): page = self.page_head while page is not None: tuples = page.register for _tuple in tuples: _hash = self.hash_function(_tuple.text) if _hash not in self.buckets: bucket = Bucket() bucket.add_ref(page) self.buckets[_hash] = bucket else: bucket = self.buckets[_hash] self.info["collision"] += 1 bucket.add_ref(page) if bucket.overflow: self.info["overflow"] += 1 page = page.next def get_info(self): return { "collision": self.info["collision"], "overflow": self.info["overflow"] } def search(self, text): cost = int() index = self.hash_function(text) bucket = self.buckets[index] while bucket is not None: for page in bucket.pages: for reg in page.register: cost += 1 if reg.text == text: return cost, reg.text bucket = bucket.next_bucket
def create_table(self, name: str, columns: List[TableColumnData] = None) -> Table: if name in [t.name for t in self.tables]: raise TableCreationError(name, "table already exists") self.tables.append(Table(name, columns)) return self.tables[-1]
def main(): start_time = datetime.datetime.now() print("Scraping 시작시간: {0}".format(start_time)) codes = scraping.codes.codes() table = Table() for i, name, code in codes.itertuples(name=None): table_last_date = table.get_last_date(code) page_num = 20 if table_last_date is not None: page_num = math.ceil( (datetime.datetime.today() - table_last_date).days / 10) if page_num == 0: continue data = sc.getData(code, page_num=page_num) data = sc.preprocess(data, code, name) if table_last_date is not None: # table_last_date가 없으면 통째로 다 넣게 됨 data = data.drop( data[data['date'].apply(lambda x: x <= table_last_date)].index) data.to_sql(name=table.name, con=table.con, if_exists='append', index=False) table.commit() progressBar(name, codes[codes['corp_code'] == code].index[0], len(codes), bar_length=50) print("모든 Scraping 완료!") table.close() end_time = datetime.datetime.now() print("종료시간: {0}".format(end_time))
def run(): infile = 'data.csv' result_file = 'results.csv' alter_table_file = 'alter_tables.sql' df = pd.read_csv(infile) df = df.groupby('X') db = Db(TEMP_DB) total_errors = float(0) alter_statements = [] with open('script.sql', 'w') as file: with open(result_file, 'w') as results: with open(alter_table_file, 'w') as alter_file: for d in df: (table_name, data) = d t = Table(db, table_name, data, sql_log=file, results_log=results, test_mode=False) t.prepare_temp_table() errors = t.test_temp_table() if errors == 0: statement = t.get_alter_table_for_original_table() alter_statements.append(statement) alter_file.write(statement) total_errors += float(errors) if total_errors == 0: print(f'All tests passed: alter table file generated in {alter_table_file}') do_it = input(f"Do you want to apply the changes to the original database ({ORIGINAL_DB}) [yN]:") if do_it.lower() == 'y': print('Applying modifications to the original database') db = Db(ORIGINAL_DB) for statement in alter_statements: db.execute(statement) print('All done') else: print(f'Not all tests passed') sys.exit(1)
stdout.flush() def get_row_for_rsi(code): values = ['date', 'code', 'name', 'close', 'volume'] df = pd.DataFrame(table_origin.get_rows(code, values), columns=values) df['volume-price'] = df.apply(lambda x: x['close'] * x['volume']) return df, code start_time = datetime.datetime.now() print("RSI 계산 시작시간: {0}".format(start_time)) codes = scraping.codes.codes() result = pd.DataFrame(columns=['date', 'code', 'name', 'rsi']) table_origin = Table() table_secondary = TableSecondary("daily_rsi") with Pool(processes=4) as pool: # job_rsi가 main이 아니면 pool에서 오류남 for df, code in pool.map(get_row_for_rsi, codes['corp_code'].tolist()): df = rsi(df) table_last_date = table_secondary.get_last_date(code) # 테이블에 저장 if table_last_date is not None: # table_last_date가 없으면 통째로 다 넣게 됨 df = df.drop(df[df['date'].apply(lambda x: x <= table_last_date)].index) df[['date', 'code', 'name', 'rsi']].to_sql( name=table_secondary.name, con=table_secondary.con, if_exists='append', index=False) # result에 저장 if df.iloc[-1]['rsi'] <= 30):
from db.table import Table from db.files import Files from db.file_changes import FileChange from db.log import Log from db.patient import Patient from db.study import Study from db.series import Series from db.replica import Replica from db.replica_files import ReplicaFiles from db.share_files import SharedFiles from db.users import Users Table.register(Log) Table.register(Patient) Table.register(Replica) Table.register(Study) Table.register(Series) Table.register(Files) Table.register(Users) Table.register(FileChange) Table.register(ReplicaFiles) Table.register(SharedFiles)