class InstagramGeoFinder: def __init__(self): # username = os.environ["instagram_username"] # password = os.environ["instagram_password"] self.settings_file = "instagram_client" device_id = None try: if not os.path.isfile(self.settings_file): self.api = Client(auto_patch=True, authenticate=False) else: with open(self.settings_file) as file_data: cached_settings = json.load(file_data, object_hook=from_json) print('Reusing settings: {0!s}'.format(self.settings_file)) device_id = cached_settings.get('device_id') # reuse auth settings self.api = Client(username="******", password="******", settings=cached_settings) except (ClientCookieExpiredError, ClientLoginRequiredError) as e: # Login expired # Do relogin but use default ua, keys and such self.api = Client( username="******", password="******", device_id=device_id, on_login=lambda x: onlogin_callback(x, self.settings_file)) def find_geo(self, location_id, count): result_photo = [] rank_token = Client.generate_uuid() location_info = self.api.location_info(location_id) self.file_name = location_info["location"]["name"] result_location = self.api.location_section(location_id, rank_token, tab='recent') for item in result_location["sections"]: result_photo.append(item) next_max_id = result_location["next_max_id"] while next_max_id: result_location = self.api.location_section(location_id, rank_token, tab='recent', max_id=next_max_id) for item in result_location["sections"]: result_photo.append(item) if len(result_photo) >= count: # get only first 600 or so break try: next_max_id = result_location["next_max_id"] except KeyError: break logging.info(len(result_photo)) return result_photo def get_filename(self): filename = "instagram_%s" % self.file_name filename += ".html" return filename def save_to_file(self, array_photo): logging.info("Save to html") html_str = """ <!DOCTYPE html> <html> <head> <meta charset="utf-8"> <title>Kiski Finder</title> </head> <body> <table border=1> <tr> <th>Kiska</th> <th>FullName</th> <th>Time and Description</th> <th>User</th> <th>Photo</th> </tr> <indent> """ html_end = """ </indent> </table> </body> </html> """ table = "" for layout in array_photo: for item in layout["layout_content"]["medias"]: try: time = int(item["media"]["caption"]["created_at"]) photo_time = datetime.datetime.utcfromtimestamp( time).strftime('%Y-%m-%d %H:%M:%S') photo_time += " </br> %s" % item["media"]["caption"]["text"] except TypeError: photo_time = "-" table += "<tr>" table += str.format( "<td><a href=http://instagram.com/{0}/>{0}</a></td>", item["media"]["user"]["username"]) table += str.format("<td>{0}</td>", item["media"]["user"]["full_name"]) table += str.format( '<td width="200" style="word-break: break-all;">{0}</td>', photo_time) table += """<td> <img src=""" + item["media"]["user"][ "profile_pic_url"] + """ width="255" height="255" alt="lorem"> </td>""" table += "<td>" index = 0 try: table += """<img src=""" + item["media"][ "image_versions2"]["candidates"][0][ "url"] + """ width="255" height="255" alt="lorem">""" except KeyError: table += """<img src=""" + item["media"]["carousel_media"][ 0]["image_versions2"]["candidates"][0][ "url"] + """ width="255" height="255" alt="lorem">""" index += 1 if index > 10: break table += "</td>" table += "</tr>" if not os.path.isdir("result_html"): os.mkdir("result_html") with open("result_html//" + self.get_filename(), 'w', encoding="utf-8") as file: file.write(html_str + table + html_end) # Сохраним в HTML + return "result_html/" + self.get_filename()
class Scraper: """Class for extracting data from Instagram""" def __init__(self): """ :param api: API client :param app_id: here app id :param app_code: here app code :param stories_found: list of stories found :param users_found: list of users found :param location_categories: dict that maps a location to its category """ self.api = None self.app_id = None self.app_code = None self.stories_found = [] self.users_found = [] self.locations_categories = {} self.setLogging() def login(self, username: str, password: str): """Logs to Instagram :param username: Instagram login name :param password: Instagram login password """ self.api = Client(username, password) def setHereApp(self, app_id: str, app_code: str): """Sets Here app id and code :param app_id: here app id :param app_code: here app code """ self.app_id = app_id self.app_code = app_code def findStories(self, source_id: int, filename: str): """Searches stories made in a location and saves them in a file :param source_id: instagram source location id :param filename: file where to save stories """ try: results = self.api.location_stories(source_id) items = results['story']['items'] for item in items: item_id = item['id'] if item_id in self.stories_found: continue self.stories_found.append(item_id) userid = item['user']['pk'] timestamp = item['expiring_at'] - 86400 with open(filename, 'a') as file: writer = csv.writer(file, delimiter='|', quoting=csv.QUOTE_MINIMAL) for story_location in item['story_locations']: location = story_location['location'] location_id = location['pk'] location_name = location['name'] writer.writerow( [userid, timestamp, location_id, location_name]) except Exception as e: self.logger.error(e) def categorize(self, source_file: str, target_file: str): """Searches locations' categories :param source_file: file with locations to find categories :param target_file: file where to write locations along their categories """ with open(source_file, 'r') as inp, open(target_file, 'a') as out: reader = csv.reader(inp, delimiter='|') writer = csv.writer(out, delimiter='|') for row in reader: userid = row[0] timestamp = row[1] location_id = row[2] location_name = row[3] try: if (location_id not in self.locations_categories): info = self.api.location_info(location_id) lat = info['location']['lat'] lng = info['location']['lng'] category = '' endpoint = 'https://places.cit.api.here.com/places/v1/autosuggest?at=' + \ str(lat) + ',' + str(lng) + '&q=' + location_name + \ '&app_id=' + self.app_id + '&app_code=' + self.app_code r = requests.get(endpoint) json = r.json() results = json["results"] if (len(results) > 0): category = results[0]['category'] self.locations_categories[location_id] = category writer.writerow([ userid, timestamp, location_id, location_name, self.locations_categories[location_id] ]) except Exception as e: self.logger.error(e) continue def setLogging(self): """Sets logger handler and formatters """ self.logger = logging.getLogger(__name__) self.logger.setLevel(logging.INFO) errorLogHandler = handlers.RotatingFileHandler('error.log', maxBytes=5000, backupCount=0) errorLogHandler.setLevel(logging.ERROR) formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') errorLogHandler.setFormatter(formatter) self.logger.addHandler(errorLogHandler)