Example #1
0
def test():
    infile = 'data.csv'
    df = pd.read_csv(infile)
    df = df.groupby('X')
    db = Db(TEMP_DB)
    with open('script.sql', 'w') as file:
        for d in df:
            (table_name, data) = d
            t = Table(db, table_name, data, file, test_mode=True)
            t.prepare_temp_table()
class Control:
    buckets = dict()
    table = Table()
    info = {"collision": 0, "overflow": 0}
    page_head = Page()

    def hash_function(self, text):
        value = str()
        for i in text:
            char_value = math.sqrt(ord(i))
            value += str(int(math.modf(char_value)[0] * 10))
        return int(value)

    def readfile(self, file):
        page = self.page_head
        tuples = list()
        with open(file, 'r') as reader:
            for reg in reader:
                reg = reg.replace("\n", "")
                tuples.append(Tuple(reg))
                if not page.set_register(reg):
                    page = page.next
        self.table.tuples = tuples
        self.dohash()

    def dohash(self):
        page = self.page_head
        while page is not None:
            tuples = page.register
            for _tuple in tuples:
                _hash = self.hash_function(_tuple.text)
                if _hash not in self.buckets:
                    bucket = Bucket()
                    bucket.add_ref(page)
                    self.buckets[_hash] = bucket
                else:
                    bucket = self.buckets[_hash]
                    self.info["collision"] += 1
                    bucket.add_ref(page)
                if bucket.overflow:
                    self.info["overflow"] += 1
            page = page.next

    def get_info(self):
        return {
             "collision": self.info["collision"],
             "overflow": self.info["overflow"]
        }

    def search(self, text):
        cost = int()
        index = self.hash_function(text)
        bucket = self.buckets[index]
        while bucket is not None:
            for page in bucket.pages:
                for reg in page.register:
                    cost += 1
                    if reg.text == text:
                        return cost, reg.text
            bucket = bucket.next_bucket
Example #3
0
    def create_table(self,
                     name: str,
                     columns: List[TableColumnData] = None) -> Table:
        if name in [t.name for t in self.tables]:
            raise TableCreationError(name, "table already exists")

        self.tables.append(Table(name, columns))

        return self.tables[-1]
Example #4
0
def main():
    start_time = datetime.datetime.now()
    print("Scraping 시작시간: {0}".format(start_time))

    codes = scraping.codes.codes()

    table = Table()

    for i, name, code in codes.itertuples(name=None):
        table_last_date = table.get_last_date(code)
        page_num = 20
        if table_last_date is not None:
            page_num = math.ceil(
                (datetime.datetime.today() - table_last_date).days / 10)
        if page_num == 0:
            continue
        data = sc.getData(code, page_num=page_num)
        data = sc.preprocess(data, code, name)
        if table_last_date is not None:  # table_last_date가 없으면 통째로 다 넣게 됨
            data = data.drop(
                data[data['date'].apply(lambda x: x <= table_last_date)].index)
        data.to_sql(name=table.name,
                    con=table.con,
                    if_exists='append',
                    index=False)
        table.commit()
        progressBar(name,
                    codes[codes['corp_code'] == code].index[0],
                    len(codes),
                    bar_length=50)

    print("모든 Scraping 완료!")
    table.close()

    end_time = datetime.datetime.now()
    print("종료시간: {0}".format(end_time))
Example #5
0
def run():
    infile = 'data.csv'
    result_file = 'results.csv'
    alter_table_file = 'alter_tables.sql'

    df = pd.read_csv(infile)
    df = df.groupby('X')
    db = Db(TEMP_DB)
    total_errors = float(0)
    alter_statements = []

    with open('script.sql', 'w') as file:
        with open(result_file, 'w') as results:
            with open(alter_table_file, 'w') as alter_file:
                for d in df:
                    (table_name, data) = d
                    t = Table(db, table_name, data, sql_log=file, results_log=results, test_mode=False)
                    t.prepare_temp_table()
                    errors = t.test_temp_table()
                    if errors == 0:
                        statement = t.get_alter_table_for_original_table()
                        alter_statements.append(statement)
                        alter_file.write(statement)
                    total_errors += float(errors)
    if total_errors == 0:
        print(f'All tests passed: alter table file generated in {alter_table_file}')
        do_it = input(f"Do you want to apply the changes to the original database ({ORIGINAL_DB}) [yN]:")
        if do_it.lower() == 'y':
            print('Applying modifications to the original database')
            db = Db(ORIGINAL_DB)
            for statement in alter_statements:
                db.execute(statement)
            print('All done')
    else:
        print(f'Not all tests passed')
        sys.exit(1)
Example #6
0
    stdout.flush()


def get_row_for_rsi(code):
    values = ['date', 'code', 'name', 'close', 'volume']
    df = pd.DataFrame(table_origin.get_rows(code, values), columns=values)
    df['volume-price'] = df.apply(lambda x: x['close'] * x['volume'])
    return df, code


start_time = datetime.datetime.now()
print("RSI 계산 시작시간: {0}".format(start_time))
codes = scraping.codes.codes()
result = pd.DataFrame(columns=['date', 'code', 'name', 'rsi'])

table_origin = Table()
table_secondary = TableSecondary("daily_rsi")

with Pool(processes=4) as pool: # job_rsi가 main이 아니면 pool에서 오류남
    for df, code in pool.map(get_row_for_rsi, codes['corp_code'].tolist()):
        df = rsi(df)
        table_last_date = table_secondary.get_last_date(code)

        # 테이블에 저장
        if table_last_date is not None:  # table_last_date가 없으면 통째로 다 넣게 됨
            df = df.drop(df[df['date'].apply(lambda x: x <= table_last_date)].index)
        df[['date', 'code', 'name', 'rsi']].to_sql(
            name=table_secondary.name, con=table_secondary.con, if_exists='append', index=False)

        # result에 저장
        if df.iloc[-1]['rsi'] <= 30):
Example #7
0
from db.table import Table
from db.files import Files
from db.file_changes import FileChange
from db.log import Log
from db.patient import Patient
from db.study import Study
from db.series import Series
from db.replica import Replica
from db.replica_files import ReplicaFiles
from db.share_files import SharedFiles
from db.users import Users

Table.register(Log)
Table.register(Patient)
Table.register(Replica)
Table.register(Study)
Table.register(Series)
Table.register(Files)
Table.register(Users)
Table.register(FileChange)
Table.register(ReplicaFiles)
Table.register(SharedFiles)