def _insert_person(self, person): """ insert the person object in the SQLite database Args: person (app.PersonObj): object data downloaded from FamilySearch """ conn: Connection = self._get_connection() cursor: Cursor = conn.cursor() sql = """ INSERT INTO persons (personid, timestamp, referenceid, status_list, status, generation, fsperson, name, gender, born, lifespan, fatherids, motherids, relationships, last_modified) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """ try: cursor.execute( sql, (person.personid, person.timestamp, person.referenceid, person.status_list, person.status, person.generation, person.fsperson, person.name, person.gender, person.born, person.lifespan, person.fatherids, person.motherids, person.relationships, person.last_modified)) conn.commit() conn.close() # except sqlite3.Error as e: app.write_log('error', "SQLite INSERT person error occurred: " + e.args[0])
def __init__(self): """ initialize the SQLite database""" conn: Connection = self._get_connection() cursor: Cursor = conn.cursor() try: cursor.execute(""" CREATE TABLE IF NOT EXISTS persons ( id INTEGER PRIMARY KEY, personid TEXT NOT NULL, timestamp INTEGER NOT NULL, referenceid TEXT NOT NULL, status_list TEXT NOT NULL, status TEXT NOT NULL, generation INTEGER NOT NULL, fsperson TEXT, name TEXT, gender TEXT, born TEXT, lifespan TEXT, fatherids TEXT, motherids TEXT, relationships TEXT, last_modified INTEGER); """) conn.commit() cursor.execute(""" CREATE TABLE IF NOT EXISTS sessions ( id INTEGER PRIMARY KEY, timestamp INTEGER NOT NULL, person_count INTEGER NOT NULL, status TEXT, change_log TEXT); """) conn.commit() cursor.execute(""" CREATE VIEW IF NOT EXISTS ancestors AS SELECT -- list of ancestors in the persisted data generation, name, gender, lifespan, personid, fatherids, motherids, referenceid, timestamp FROM persons WHERE referenceid = ( SELECT referenceid FROM ( -- latest referencid in the persisted data SELECT referenceid, max(timestamp) FROM persons WHERE personid = referenceid GROUP BY referenceid ) ) AND id in (SELECT id FROM (SELECT id, max(timestamp) FROM persons GROUP BY generation, referenceid, gender)) ORDER BY generation ASC, gender DESC """) conn.commit() conn.close() # except sqlite3.Error as e: app.write_log('error', "SQLite CREATE TABLE error occurred:" + e.args[0])
def get_persons(self, referenceid): """ Get the most recent persisted persons with referenceid from the SQLite database Args: referenceid (str): the ID of the reference person Returns: (list) with the most recent matching records (dict) """ conn: Connection = self._get_connection() conn.row_factory = sqlite3.Row cursor: Cursor = conn.cursor() sql = 'SELECT * FROM persons WHERE referenceid = ? ORDER BY timestamp DESC' try: cursor.execute(sql, [referenceid]) rows = [dict(row) for row in cursor.fetchall()] # convert sqlite3.Row return rows # except sqlite3.Error as e: app.write_log('error', "SQLite INSERT person error occurred:" + e.args[0]) return None
def persist_session(self, timestamp, referenceid, person_count, change_log): """ persist session data Args: timestamp (int): timestamp for all data in the current session referenceid (str): id of the reference person person_count (int): number of person objects processed change_log (list): list with all changes """ conn: Connection = self._get_connection() cursor: Cursor = conn.cursor() sql = """ INSERT INTO sessions (timestamp, person_count, status, change_log) VALUES (?, ?, ?, ?) """ try: dt_object = datetime.fromtimestamp(timestamp) status = 'Timestamp: ' + \ dt_object.strftime("%Y-%m-%d %H:%M:%S") + '. Ancestors for ID: ' + referenceid if change_log: status += '. Found ' + \ str(len(change_log)) + ' changes in ancestors.' else: status += '. Nothing has changed.' app.write_log('info', status) for change in change_log: app.write_log('info', change) cursor.execute( sql, (timestamp, person_count, status, json.dumps(change_log))) conn.commit() conn.close() # except sqlite3.Error as e: app.write_log('error', "SQLite INSERT session error occurred:" + e.args[0])
def check_integrity(self): """ check the SQLite database integrity Returns: (bool): True if OK, False if not. """ conn: Connection = self._get_connection() conn.row_factory = sqlite3.Row cursor: Cursor = conn.cursor() try: cursor.execute('pragma integrity_check;') rows = [dict(row) for row in cursor.fetchall()] # convert sqlite3.Row status = rows[0]['integrity_check'] if status == 'ok': return True return False # except sqlite3.Error as e: app.write_log('error', "SQLite INSERT person error occurred:" + e.args[0]) return False
def login(self): """ retrieve FamilySearch session ID (https://familysearch.org/developers/docs/guides/oauth2) """ while True: try: url = "https://www.familysearch.org/auth/familysearch/login" app.write_log('debug', "Downloading: " + url) r = requests.get(url, params={"ldsauth": False}, allow_redirects=False) url = r.headers["Location"] app.write_log('debug', "Downloading: " + url) r = requests.get(url, allow_redirects=False) idx = r.text.index('name="params" value="') span = r.text[idx + 21:].index('"') params = r.text[idx + 21:idx + 21 + span] url = "https://ident.familysearch.org/cis-web/oauth2/v3/authorization" app.write_log('debug', "Downloading: " + url) r = requests.post( url, data={ "params": params, "userName": self.username, "password": self.password }, allow_redirects=False, ) if "The username or password was incorrect" in r.text: app.write_log('error', "The username or password was incorrect") return False if "Invalid Oauth2 Request" in r.text: app.write_log('error', "Invalid Oauth2 Request") time.sleep(self.timeout) continue url = r.headers["Location"] app.write_log('debug', "Downloading: " + url) r = requests.get(url, allow_redirects=False) self.fssessionid = r.cookies["fssessionid"] except requests.exceptions.ReadTimeout: app.write_log('debug', "Read timed out") continue except requests.exceptions.ConnectionError: app.write_log('debug', "Connection aborted") time.sleep(self.timeout) continue except requests.exceptions.HTTPError: app.write_log('debug', "HTTPError") time.sleep(self.timeout) continue except KeyError: app.write_log('debug', "KeyError") time.sleep(self.timeout) continue except ValueError: app.write_log('debug', "ValueError") time.sleep(self.timeout) continue app.write_log('debug', "FamilySearch session id: " + self.fssessionid) self.set_current() return True
def get_url(self, url, fsaccept="application/json"): """ retrieve JSON structure from a FamilySearch URL """ self.counter += 1 loop_counter = 0 while loop_counter <= 3: loop_counter += 1 try: app.write_log('debug', "Downloading: " + url) r = requests.get( "https://familysearch.org" + url, headers={"Accept": fsaccept}, cookies={"fssessionid": self.fssessionid}, timeout=self.timeout, ) except requests.exceptions.ReadTimeout: app.write_log('debug', "Read timed out") continue except requests.exceptions.ConnectionError: app.write_log('debug', "Connection aborted") time.sleep(self.timeout) continue app.write_log('debug', "Status code: %s" % r.status_code) self.status_code = r.status_code if r.status_code == 204: # The request was successful but nothing was available to # return. return None if r.status_code in {404, 405, 406, 410, 429, 500, 503, 504}: # 404: A resource was requested that does not exist. # 405: The request was not understood by the networking and routing infrastructure. # 406: An invalid content type is being used in the Accept header. # 410: The resource you are requesting has been deleted. # 429: Too many requests, the user has used too much processing time recently. # 500: Internal Server Error. # 503: Service Unavailable. # 504: Gateway Timeout. app.write_log('debug', "WARNING: code " + r.status_code + ", " + url) return None if r.status_code == 401: # The user is not properly authenticated. self.login() continue try: r.raise_for_status() except requests.exceptions.HTTPError: app.write_log('debug', "HTTPError") if r.status_code == 403: # A resource was requested that the user does not have # permission to access. app.write_log( 'debug', "WARNING: code " + r.status_code + ", " + url) return None time.sleep(self.timeout) continue try: return r.json() except Exception as e: # 420: Methode failure self.status_code = 420 app.write_log( 'error', "WARNING: corrupted file from %s, error: %s" % (url, e)) return None return None
def persist_person(self, person): """ persist person to database conditionally Args: person (app.PersonObj): object data downloaded from FamilySearch Result: (list): list with changes in person object """ if person.status == 'undefined': chgs = [] rows = self._get_person(person.personid, person.referenceid) if rows: # compare downloaded person data to the persisted person data row = rows[0] if person.status_list != row['status_list']: chgs.append('Status of HTTP codes has changed to ' + str(person.status_list) + '.') if person.generation != row['generation']: chgs.append('The generation number has changed to ' + str(person.generation) + '.') if person.name != row['name']: chgs.append('The name has changed from ' + str(row['name']) + ' to ' + str(person.name) + '.') if person.gender != row['gender']: chgs.append('The gender has changed to ' + str(person.gender) + '.') if person.born != row['born']: chgs.append('The birth date has changed from ' + str(row['born']) + ' to ' + str(person.born) + '.') if person.lifespan != row['lifespan']: chgs.append('The lifespan has changed from ' + str(row['lifespan']) + ' to ' + str(person.lifespan) + '.') if (person.fatherids) != row['fatherids']: chgs.append('The fatherids have changed from ' + str(row['fatherids']) + ' to ' + str(person.fatherids) + '.') if person.motherids != row['motherids']: chgs.append('The motherids have changed from ' + str(row['motherids']) + ' to ' + str(person.motherids) + '.') if person.relationships != row['relationships']: chgs.append( 'Change in relationships: father, mother, spouse, or children.' ) if person.last_modified != row['last_modified']: chgs.append('Change(s) in the persons change history.') if chgs: person.status = 'modified' chgs.insert( 0, 'Changes detected in object: ' + str(person.personid)) self._insert_person(person) # persist modified object return chgs elif person.status == 'created': self._insert_person(person) # persist new object in database return [ 'Created person object (' + person.personid + ') in database.' ] elif person.status == 'deleted': rows = self._get_person(person.personid, person.referenceid) if rows: # copy object attributes from database row = rows[0] person.status_list = row['status_list'] person.generation = row['generation'] person.name = row['name'] person.gender = row['gender'] person.born = row['born'] person.lifespan = row['lifespan'] person.fatherids = row['fatherids'] person.motherids = row['motherids'] person.relationships = row['relationships'] person.last_modified = row['last_modified'] self._insert_person(person) # persist deleted object return [ 'Added copy of deleted person object (' + person.personid + ') in database.' ] else: app.write_log('error', 'Illegal state in person object: ' + person.status) return []