/
models.py
150 lines (119 loc) · 4.31 KB
/
models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
#!/usr/bin/env python3
import datetime
import os
import sys
# from peewee import BigIntegerField, IntegerField, SQL
from peewee import CharField, TextField, BooleanField
from peewee import SmallIntegerField
from peewee import DateTimeField, TimeField
from peewee import CompositeKey
from peewee import ForeignKeyField
from peewee import Model
from peewee import PrimaryKeyField
from peewee import fn
from settings import SETTINGS
from settings import DB as POSTGRES_DB
from utilities import logger
from xlrd import open_workbook
class DateTimeWithTimeZoneField(DateTimeField):
'''Time stamp field with time zone support for PostgreSQL'''
db_field = 'TIMESTAMP WITH TIME ZONE'
class TimeWithTimeZoneField(TimeField):
db_field = 'TIME WITH TIME ZONE'
class BaseModel(Model):
class Meta:
database = SETTINGS['DB']
class Subscription(BaseModel):
id = PrimaryKeyField()
index_url = CharField(max_length=200, unique=True)
active = BooleanField(default=True)
created_utc = DateTimeField(default=datetime.datetime.utcnow)
class Article(BaseModel):
"""Article Table
"""
id = PrimaryKeyField()
article_url = CharField(max_length=200, unique=True)
status = SmallIntegerField(default=0)
title = CharField(max_length=100, null=True)
content = TextField(null=True)
created_utc = DateTimeField(default=datetime.datetime.utcnow)
modified_utc = DateTimeField(null=True)
class SubscriptionArticle(BaseModel):
subscription = ForeignKeyField(
Subscription, to_field='id', related_name='articles',
on_delete='CASCADE')
article = ForeignKeyField(
Article, to_field='id', related_name='subscriptions',
on_delete='CASCADE')
created_utc = DateTimeField(default=datetime.datetime.utcnow)
class Meta:
primary_key = CompositeKey('subscription', 'article')
def create_tables(tables, reset=False):
for table in tables:
if not table.table_exists():
table.create_table()
logger.info('Table %s created' % table.__name__)
elif reset:
table.drop_table(cascade=True)
logger.info('Existing table %s dropped' % table.__name__)
table.create_table()
logger.info('Table %s created' % table.__name__)
else:
logger.info('Table %s already exists' % table.__name__)
return
def feed_subscription_url_from_xml(fname):
if not os.path.exists(fname):
logger.warn('%s does not exist' % fname)
return
counter = 0
with open_workbook(fname) as wb:
s = wb.sheet_by_index(0)
for row in range(1, s.nrows):
Subscription.get_or_create(
index_url=s.cell(row, 1).value
)
counter += 1
logger.info('%s index_url(s) has been inserted' % counter)
return
def feed_article_url_from_xls(fname):
if not os.path.exists(fname):
logger.warn('%s does not exist' % fname)
return
with open_workbook(fname) as wb:
s = wb.sheet_by_index(1)
for row in range(1, s.nrows):
Article.get_or_create(
article_url=s.cell(row, 1).value
)
return
def feed_subscription_article_from_xls(fname):
if not os.path.exists(fname):
logger.warn('%s does not exist' % fname)
return
with open_workbook(fname) as wb:
s = wb.sheet_by_index(2)
for row in range(1, s.nrows):
SubscriptionArticle.get_or_create(
subscription=s.cell(row, 0).value,
article=s.cell(row, 1).value
)
return
if __name__ == '__main__':
print('Using PRODUCTION database? %s' % SETTINGS['PRODUCTION_FLAG'])
if len(sys.argv) > 1 and sys.argv[1] == '--nointeraction':
pass
else:
r = input(
'Deleting and initializing tables, press Enter to continue...\n')
if r:
print('Aborted')
sys.exit(1)
tables = [Subscription, Article, SubscriptionArticle]
create_tables(tables, reset=True)
# abs_path = os.path.abspath(__file__)
# cur_dir = os.path.dirname(abs_path)
# test_url = os.path.join(cur_dir, 'test/data/test_url.xlsx')
# feed_subscription_url_from_xml(test_url)
# feed_article_url_from_xls(test_url)
# feed_subscription_article_from_xls(test_url)
POSTGRES_DB.close()