コード例 #1
0
    def on_status(self, status):
        #setup
        db = database.DButils()
        cl = classifier.MyClassifier()
        parser = Parser()

        # filter out retweets
        try:
            if status.retweeted_status:
                return None
        except:
            pass

        #filter out unrelated topics
        topics = loadTopicFiles(smoke_file)
        if not containTopic(topics, status.text):
            topics = loadTopicFiles(crime_file)
            if not containTopic(topics, status.text):
                topics = loadTopicFiles(cricket_file)
                if not containTopic(topics, status.text):
                    topics = loadTopicFiles(afl_file)
                    if not containTopic(topics, status.text):
                        topic = ""
                    else:
                        topic = "afl"
                else:
                    topic = "cricket"

            else:
                topic = "crime"
        else:
            topic = "tobacco"

        # perform sentiment analysis n store scores to json
        polarity, subjectivity, label = cl.get_sent_score(status.text)
        sent = {
            'polarity': str(polarity),
            'subjectiviy': str(subjectivity),
            'label': label
        }

        #parse tweets
        record = parser.status_parse(status, sent, topic)
        if record is None:
            return

        # save into couchdb
        db.save(db_name, record)

        #search tweets from one typical users timeline
        try:
            searchById(sys.argv[1], status.user.id)
        except Exception as e:
            print(e)
            return
        print("finish searching on user: " + str(status.user.id))
        return True
コード例 #2
0
def searchById(admin, userid):
    print("start searching " + str(userid) + " timeline")

    #set up
    db = database.DButils()
    cl = classifier.MyClassifier()
    parser = Parser()
    tagger = Tagger()
    user = admin
    auth = tweepy.OAuthHandler(app_auth[user].ckey, app_auth[user].csec)
    auth.set_access_token(app_auth[user].atoken, app_auth[user].asec)
    api = tweepy.API(auth,
                     wait_on_rate_limit=True,
                     wait_on_rate_limit_notify=True)

    if not api:
        print("Can't Authenticate api key")
        sys.exit(-1)

    try:
        query = api.user_timeline(user_id=userid, count=10, lang='en')
    except tweepy.TweepError:
        print("search API access time limited, searching sleeps n back soon ")
        time.sleep(16 * 60)
        print("crawler back to work")
        return
    for status in query[1:]:
        # filter out retweets
        try:
            if status.retweeted_status:
                return None
        except:
            pass

        #filter out tweets without interested topics
        topic = tagger.topic_tagger(status.text)

        # perform sentiment analysis n store scores to json
        polarity, subjectivity, label = cl.get_sent_score(status.text)
        sent = {
            'polarity': str(polarity),
            'subjectiviy': str(subjectivity),
            'label': label
        }
        #parse tweets
        try:
            record = parser.status_parse(status, sent, topic)
        except:
            record = None
            pass
        if record is None:
            return
        if topic is "Bad tweet":
            return
        # save into couchdb
        db.save(db_name, record)
コード例 #3
0
    def aurin(self):

        # melbourne values
        total_females_melb = 0
        total_males_melb = 0
        total_persons_melb = 0
        north_america_melb = 0
        africa_melb = 0
        europe_melb = 0
        asia_melb = 0
        australia_melb = 0
        new_zealand_melb = 0
        born_elsewhere_melb = 0
        median_age_melb = 0
        median_household_income_melb = 0
        gambling_activities_melb = 0
        married_males_melb = 0
        married_females_melb = 0
        unmarried_males_melb = 0
        unmarried_females_melb = 0
        married_persons_melb = 0
        unmarried_persons_melb = 0

        # sydney values
        total_females_syd = 0
        total_males_syd = 0
        total_persons_syd = 0
        north_america_syd = 0
        africa_syd = 0
        europe_syd = 0
        asia_syd = 0
        australia_syd = 0
        new_zealand_syd = 0
        born_elsewhere_syd = 0
        median_age_syd = 0
        median_household_income_syd = 0
        gambling_activities_syd = 0
        married_males_syd = 0
        married_females_syd = 0
        unmarried_males_syd = 0
        unmarried_females_syd = 0
        married_persons_syd = 0
        unmarried_persons_syd = 0

        # brisbane values
        total_females_bris = 0
        total_males_bris = 0
        total_persons_bris = 0
        north_america_bris = 0
        africa_bris = 0
        europe_bris = 0
        asia_bris = 0
        australia_bris = 0
        new_zealand_bris = 0
        born_elsewhere_bris = 0
        median_age_bris = 0
        median_household_income_bris = 0
        gambling_activities_bris = 0
        married_males_bris = 0
        married_females_bris = 0
        unmarried_males_bris = 0
        unmarried_females_bris = 0
        married_persons_bris = 0
        unmarried_persons_bris = 0

        # hobart values
        total_females_hob = 0
        total_males_hob = 0
        total_persons_hob = 0
        north_america_hob = 0
        africa_hob = 0
        europe_hob = 0
        asia_hob = 0
        australia_hob = 0
        new_zealand_hob = 0
        born_elsewhere_hob = 0
        median_age_hob = 0
        median_household_income_hob = 0
        gambling_activities_hob = 0
        married_males_hob = 0
        married_females_hob = 0
        unmarried_males_hob = 0
        unmarried_females_hob = 0
        married_persons_hob = 0
        unmarried_persons_hob = 0

        # perth values
        total_females_per = 0
        total_males_per = 0
        total_persons_per = 0
        north_america_per = 0
        africa_per = 0
        europe_per = 0
        asia_per = 0
        australia_per = 0
        new_zealand_per = 0
        born_elsewhere_per = 0
        median_age_per = 0
        median_household_income_per = 0
        gambling_activities_per = 0
        married_males_per = 0
        married_females_per = 0
        unmarried_males_per = 0
        unmarried_females_per = 0
        married_persons_per = 0
        unmarried_persons_per = 0

        # canberra values
        total_females_can = 0
        total_males_can = 0
        total_persons_can = 0
        north_america_can = 0
        africa_can = 0
        europe_can = 0
        asia_can = 0
        australia_can = 0
        new_zealand_can = 0
        born_elsewhere_can = 0
        median_age_can = 0
        median_household_income_can = 0
        gambling_activities_can = 0
        married_males_can = 0
        married_females_can = 0
        unmarried_males_can = 0
        unmarried_females_can = 0
        married_persons_can = 0
        unmarried_persons_can = 0

        # darwin values
        total_females_dar = 0
        total_males_dar = 0
        total_persons_dar = 0
        north_america_dar = 0
        africa_dar = 0
        europe_dar = 0
        asia_dar = 0
        australia_dar = 0
        new_zealand_dar = 0
        born_elsewhere_dar = 0
        median_age_dar = 0
        median_household_income_dar = 0
        gambling_activities_dar = 0
        married_males_dar = 0
        married_females_dar = 0
        unmarried_males_dar = 0
        unmarried_females_dar = 0
        married_persons_dar = 0
        unmarried_persons_dar = 0

        # adelaide values
        total_females_ade = 0
        total_males_ade = 0
        total_persons_ade = 0
        north_america_ade = 0
        africa_ade = 0
        europe_ade = 0
        asia_ade = 0
        australia_ade = 0
        new_zealand_ade = 0
        born_elsewhere_ade = 0
        median_age_ade = 0
        median_household_income_ade = 0
        gambling_activities_ade = 0
        married_males_ade = 0
        married_females_ade = 0
        unmarried_males_ade = 0
        unmarried_females_ade = 0
        married_persons_ade = 0
        unmarried_persons_ade = 0

        try:

            filejson_marital_status = open(
                "../scripts/aurin/citydata_maritalstatus.json", "r")
            data_marital_status = json.load(filejson_marital_status)

            city_list_marital_status = data_marital_status["features"]

            for city in city_list_marital_status:
                for key, value in city["properties"].items():
                    if (key == "sa4_name16"
                            and str(value).__contains__("Melbourne")):
                        married_females_melb = city["properties"][
                            "f_tot_married"]
                        unmarried_females_melb = city["properties"][
                            "f_tot_never_married"]
                        married_males_melb = city["properties"][
                            "m_tot_married"]
                        unmarried_males_melb = city["properties"][
                            "m_tot_never_married"]
                        married_persons_melb = city["properties"][
                            "p_tot_married"]
                        unmarried_persons_melb = city["properties"][
                            "p_tot_never_married"]

                    elif (key == "sa4_name16"
                          and str(value).__contains__("Sydney")):
                        married_females_syd = city["properties"][
                            "f_tot_married"]
                        unmarried_females_syd = city["properties"][
                            "f_tot_never_married"]
                        married_males_syd = city["properties"]["m_tot_married"]
                        unmarried_males_syd = city["properties"][
                            "m_tot_never_married"]
                        married_persons_syd = city["properties"][
                            "p_tot_married"]
                        unmarried_persons_syd = city["properties"][
                            "p_tot_never_married"]

                    elif (key == "sa4_name16"
                          and str(value).__contains__("Hobart")):
                        married_females_hob = city["properties"][
                            "f_tot_married"]
                        unmarried_females_hob = city["properties"][
                            "f_tot_never_married"]
                        married_males_hob = city["properties"]["m_tot_married"]
                        unmarried_males_hob = city["properties"][
                            "m_tot_never_married"]
                        married_persons_hob = city["properties"][
                            "p_tot_married"]
                        unmarried_persons_hob = city["properties"][
                            "p_tot_never_married"]

                    elif (key == "sa4_name16"
                          and str(value).__contains__("Perth")):
                        married_females_per = city["properties"][
                            "f_tot_married"]
                        unmarried_females_per = city["properties"][
                            "f_tot_never_married"]
                        married_males_per = city["properties"]["m_tot_married"]
                        unmarried_males_per = city["properties"][
                            "m_tot_never_married"]
                        married_persons_per = city["properties"][
                            "p_tot_married"]
                        unmarried_persons_per = city["properties"][
                            "p_tot_never_married"]

                    elif (key == "sa4_name16"
                          and str(value).__contains__("Brisbane")):
                        married_females_bris = city["properties"][
                            "f_tot_married"]
                        unmarried_females_bris = city["properties"][
                            "f_tot_never_married"]
                        married_males_bris = city["properties"][
                            "m_tot_married"]
                        unmarried_males_bris = city["properties"][
                            "m_tot_never_married"]
                        married_persons_bris = city["properties"][
                            "p_tot_married"]
                        unmarried_persons_bris = city["properties"][
                            "p_tot_never_married"]

                    elif (key == "sa4_name16"
                          and str(value).__contains__("Darwin")):
                        married_females_dar = city["properties"][
                            "f_tot_married"]
                        unmarried_females_dar = city["properties"][
                            "f_tot_never_married"]
                        married_males_dar = city["properties"]["m_tot_married"]
                        unmarried_males_dar = city["properties"][
                            "m_tot_never_married"]
                        married_persons_dar = city["properties"][
                            "p_tot_married"]
                        unmarried_persons_dar = city["properties"][
                            "p_tot_never_married"]

                    elif (key == "sa4_name16"
                          and str(value).__contains__("Adelaide")):
                        married_females_ade = city["properties"][
                            "f_tot_married"]
                        unmarried_females_ade = city["properties"][
                            "f_tot_never_married"]
                        married_males_ade = city["properties"]["m_tot_married"]
                        unmarried_males_ade = city["properties"][
                            "m_tot_never_married"]
                        married_persons_ade = city["properties"][
                            "p_tot_married"]
                        unmarried_persons_ade = city["properties"][
                            "p_tot_never_married"]

                    elif (key == "sa4_name16" and str(value).__contains__(
                            "Australian Capital Territory")):
                        married_females_can = city["properties"][
                            "f_tot_married"]
                        unmarried_females_can = city["properties"][
                            "f_tot_never_married"]
                        married_males_can = city["properties"]["m_tot_married"]
                        unmarried_males_can = city["properties"][
                            "m_tot_never_married"]
                        married_persons_can = city["properties"][
                            "p_tot_married"]
                        unmarried_persons_can = city["properties"][
                            "p_tot_never_married"]

            filejson_gambling = open("../scripts/aurin/citydata_gambling.json",
                                     "r")
            data_gambling = json.load(filejson_gambling)

            city_list_gambling = data_gambling["features"]

            for city in city_list_gambling:
                for key, value in city["properties"].items():
                    if (key == "sa4_name16"
                            and str(value).__contains__("Melbourne")):
                        gambling_activities_melb = city["properties"][
                            "artsr_gambling_ac_p"]

                    elif (key == "sa4_name16"
                          and str(value).__contains__("Sydney")):
                        gambling_activities_syd = city["properties"][
                            "artsr_gambling_ac_p"]

                    elif (key == "sa4_name16"
                          and str(value).__contains__("Hobart")):
                        gambling_activities_hob = city["properties"][
                            "artsr_gambling_ac_p"]

                    elif (key == "sa4_name16"
                          and str(value).__contains__("Perth")):
                        gambling_activities_per = city["properties"][
                            "artsr_gambling_ac_p"]

                    elif (key == "sa4_name16"
                          and str(value).__contains__("Brisbane")):
                        gambling_activities_bris = city["properties"][
                            "artsr_gambling_ac_p"]

                    elif (key == "sa4_name16"
                          and str(value).__contains__("Darwin")):
                        gambling_activities_dar = city["properties"][
                            "artsr_gambling_ac_p"]

                    elif (key == "sa4_name16"
                          and str(value).__contains__("Adelaide")):
                        gambling_activities_ade = city["properties"][
                            "artsr_gambling_ac_p"]

                    elif (key == "sa4_name16" and str(value).__contains__(
                            "Australian Capital Territory")):
                        gambling_activities_can = city["properties"][
                            "artsr_gambling_ac_p"]

            city_list_gambling = data_gambling["features"]
            for city in city_list_gambling:
                for key, value in city["properties"].items():
                    if (key == "sa4_name16"
                            and str(value).__contains__("Melbourne")):
                        gambling_activities_melb = city["properties"][
                            "artsr_gambling_ac_p"]

                    elif (key == "sa4_name16"
                          and str(value).__contains__("Sydney")):
                        gambling_activities_syd = city["properties"][
                            "artsr_gambling_ac_p"]

                    elif (key == "sa4_name16"
                          and str(value).__contains__("Hobart")):
                        gambling_activities_hob = city["properties"][
                            "artsr_gambling_ac_p"]

                    elif (key == "sa4_name16"
                          and str(value).__contains__("Perth")):
                        gambling_activities_per = city["properties"][
                            "artsr_gambling_ac_p"]

                    elif (key == "sa4_name16"
                          and str(value).__contains__("Brisbane")):
                        gambling_activities_bris = city["properties"][
                            "artsr_gambling_ac_p"]

                    elif (key == "sa4_name16"
                          and str(value).__contains__("Darwin")):
                        gambling_activities_dar = city["properties"][
                            "artsr_gambling_ac_p"]

                    elif (key == "sa4_name16"
                          and str(value).__contains__("Adelaide")):
                        gambling_activities_ade = city["properties"][
                            "artsr_gambling_ac_p"]

                    elif (key == "sa4_name16" and str(value).__contains__(
                            "Australian Capital Territory")):
                        gambling_activities_can = city["properties"][
                            "artsr_gambling_ac_p"]

            filejson = open("../scripts/aurin/citydata_incomeage.json", "r")
            data_incomeage = json.load(filejson)

            city_list_age = data_incomeage["features"]
            for city in city_list_age:
                for key, value in city["properties"].items():
                    if (key == "sa4_name16"
                            and str(value).__contains__("Melbourne")):
                        median_age_melb = city["properties"][
                            "med_age_psns_tot"]
                        median_household_income_melb = city["properties"][
                            "med_hhd_inc_wk_tot"]

                    elif (key == "sa4_name16"
                          and str(value).__contains__("Sydney")):
                        median_age_syd = city["properties"]["med_age_psns_tot"]
                        median_household_income_syd = city["properties"][
                            "med_hhd_inc_wk_tot"]

                    elif (key == "sa4_name16"
                          and str(value).__contains__("Hobart")):
                        median_age_hob = city["properties"]["med_age_psns_tot"]
                        median_household_income_hob = city["properties"][
                            "med_hhd_inc_wk_tot"]

                    elif (key == "sa4_name16"
                          and str(value).__contains__("Perth")):
                        median_age_per = city["properties"]["med_age_psns_tot"]
                        median_household_income_per = city["properties"][
                            "med_hhd_inc_wk_tot"]

                    elif (key == "sa4_name16"
                          and str(value).__contains__("Brisbane")):
                        median_age_bris = city["properties"][
                            "med_age_psns_tot"]
                        median_household_income_bris = city["properties"][
                            "med_hhd_inc_wk_tot"]

                    elif (key == "sa4_name16"
                          and str(value).__contains__("Darwin")):
                        median_age_dar = city["properties"]["med_age_psns_tot"]
                        median_household_income_dar = city["properties"][
                            "med_hhd_inc_wk_tot"]

                    elif (key == "sa4_name16"
                          and str(value).__contains__("Adelaide")):
                        median_age_ade = city["properties"]["med_age_psns_tot"]
                        median_household_income_ade = city["properties"][
                            "med_hhd_inc_wk_tot"]

                    elif (key == "sa4_name16" and str(value).__contains__(
                            "Australian Capital Territory")):
                        median_age_can = city["properties"]["med_age_psns_tot"]
                        median_household_income_can = city["properties"][
                            "med_hhd_inc_wk_tot"]

            file = open("../scripts/aurin/citydata_birthcountries.json", "r")
            data = json.load(file)

            city_list = data["features"]
            for city in city_list:
                for key, value in city["properties"].items():
                    if (key == "sa4_name16"
                            and str(value).__contains__("Melbourne")):
                        north_america = city["properties"][
                            "united_states_america_p"] + city["properties"][
                                "canada_p"]
                        north_america_melb += north_america
                        africa = city["properties"]["egypt_p"] + city["properties"]["south_africa_p"] + \
                                 city["properties"][
                                     "zimbabwe_p"]
                        africa_melb += africa
                        australia_melb = city["properties"]["australia_p"]
                        new_zealand_melb = city["properties"]["new_zealand_p"]
                        born_elsewhere_melb = city["properties"][
                            "born_elsewhere_p"]
                        europe = city["properties"]["germany_p"] + city["properties"]["croatia_p"] + city["properties"][
                            "united_kingdom_ci_im_p"] + \
                                 city["properties"]["netherlands_p"] + city["properties"]["greece_p"] + \
                                 city["properties"][
                                     "ireland_p"] + \
                                 city["properties"]["italy_p"] + city["properties"]["turkey_p"] + city["properties"][
                                     "poland_p"] + \
                                 city["properties"]["fiji_p"] + city["properties"]["malta_p"]
                        europe_melb += europe

                        asia = city["properties"]["china_excl_sars_taiwan_p"] + city["properties"]["vietnam_p"] + \
                               city["properties"]["sri_lanka_p"] + \
                               city["properties"]["japan_p"] + city["properties"]["singapore_p"] + city["properties"][
                                   "malaysia_p"] + \
                               city["properties"]["philippines_p"] + city["properties"]["thailand_p"] + \
                               city["properties"][
                                   "hong_kong_sar_china_p"] + \
                               city["properties"]["india_p"] + city["properties"]["indonesia_p"] + city["properties"][
                                   "pakistan_p"] + city["properties"]["iraq_p"] + \
                               city["properties"]["lebanon_p"] + city["properties"]["korea_republic_south_p"]
                        asia_melb += asia
                        total_females_melb = city["properties"]["tot_f"]
                        total_males_melb = city["properties"]["tot_m"]
                        total_persons_melb = city["properties"]["tot_p"]

                    elif (key == "sa4_name16"
                          and str(value).__contains__("Sydney")):

                        north_america = city["properties"][
                            "united_states_america_p"] + city["properties"][
                                "canada_p"]
                        north_america_syd += north_america
                        africa = city["properties"]["egypt_p"] + city["properties"]["south_africa_p"] + \
                                 city["properties"][
                                     "zimbabwe_p"]
                        africa_syd += africa
                        australia_syd = city["properties"]["australia_p"]
                        new_zealand_syd = city["properties"]["new_zealand_p"]
                        born_elsewhere_syd = city["properties"][
                            "born_elsewhere_p"]
                        europe = city["properties"]["germany_p"] + city["properties"]["croatia_p"] + city["properties"][
                            "united_kingdom_ci_im_p"] + \
                                 city["properties"]["netherlands_p"] + city["properties"]["greece_p"] + \
                                 city["properties"][
                                     "ireland_p"] + \
                                 city["properties"]["italy_p"] + city["properties"]["turkey_p"] + city["properties"][
                                     "poland_p"] + \
                                 city["properties"]["fiji_p"] + city["properties"]["malta_p"]
                        europe_syd += europe

                        asia = city["properties"]["china_excl_sars_taiwan_p"] + city["properties"]["vietnam_p"] + \
                               city["properties"][
                                   "sri_lanka_p"] + \
                               city["properties"]["japan_p"] + city["properties"]["singapore_p"] + city["properties"][
                                   "malaysia_p"] + \
                               city["properties"]["philippines_p"] + city["properties"]["thailand_p"] + \
                               city["properties"][
                                   "hong_kong_sar_china_p"] + \
                               city["properties"]["india_p"] + city["properties"]["indonesia_p"] + city["properties"][
                                   "pakistan_p"] + \
                               city["properties"]["iraq_p"] + \
                               city["properties"]["lebanon_p"] + city["properties"]["korea_republic_south_p"]
                        asia_syd += asia
                        total_females_syd = city["properties"]["tot_f"]
                        total_males_syd = city["properties"]["tot_m"]
                        total_persons_syd = city["properties"]["tot_p"]
                    elif (key == "sa4_name16"
                          and str(value).__contains__("Hobart")):

                        north_america = city["properties"][
                            "united_states_america_p"] + city["properties"][
                                "canada_p"]
                        north_america_hob += north_america
                        africa = city["properties"]["egypt_p"] + city["properties"]["south_africa_p"] + \
                                 city["properties"][
                                     "zimbabwe_p"]
                        africa_hob += africa
                        australia_hob = city["properties"]["australia_p"]
                        new_zealand_hob = city["properties"]["new_zealand_p"]
                        born_elsewhere_hob = city["properties"][
                            "born_elsewhere_p"]
                        europe = city["properties"]["germany_p"] + city["properties"]["croatia_p"] + city["properties"][
                            "united_kingdom_ci_im_p"] + \
                                 city["properties"]["netherlands_p"] + city["properties"]["greece_p"] + \
                                 city["properties"][
                                     "ireland_p"] + \
                                 city["properties"]["italy_p"] + city["properties"]["turkey_p"] + city["properties"][
                                     "poland_p"] + \
                                 city["properties"]["fiji_p"] + city["properties"]["malta_p"]
                        europe_hob += europe

                        asia = city["properties"]["china_excl_sars_taiwan_p"] + city["properties"]["vietnam_p"] + \
                               city["properties"][
                                   "sri_lanka_p"] + \
                               city["properties"]["japan_p"] + city["properties"]["singapore_p"] + city["properties"][
                                   "malaysia_p"] + \
                               city["properties"]["philippines_p"] + city["properties"]["thailand_p"] + \
                               city["properties"][
                                   "hong_kong_sar_china_p"] + \
                               city["properties"]["india_p"] + city["properties"]["indonesia_p"] + city["properties"][
                                   "pakistan_p"] + \
                               city["properties"]["iraq_p"] + \
                               city["properties"]["lebanon_p"] + city["properties"]["korea_republic_south_p"]
                        asia_hob += asia
                        total_females_hob = city["properties"]["tot_f"]
                        total_males_hob = city["properties"]["tot_m"]
                        total_persons_hob = city["properties"]["tot_p"]

                    elif (key == "sa4_name16"
                          and str(value).__contains__("Perth")):

                        north_america = city["properties"][
                            "united_states_america_p"] + city["properties"][
                                "canada_p"]
                        north_america_per += north_america
                        africa = city["properties"]["egypt_p"] + city["properties"]["south_africa_p"] + \
                                 city["properties"][
                                     "zimbabwe_p"]
                        africa_per += africa
                        australia_per = city["properties"]["australia_p"]
                        new_zealand_per = city["properties"]["new_zealand_p"]
                        born_elsewhere_per = city["properties"][
                            "born_elsewhere_p"]
                        europe = city["properties"]["germany_p"] + city["properties"]["croatia_p"] + city["properties"][
                            "united_kingdom_ci_im_p"] + \
                                 city["properties"]["netherlands_p"] + city["properties"]["greece_p"] + \
                                 city["properties"][
                                     "ireland_p"] + \
                                 city["properties"]["italy_p"] + city["properties"]["turkey_p"] + city["properties"][
                                     "poland_p"] + \
                                 city["properties"]["fiji_p"] + city["properties"]["malta_p"]
                        europe_per += europe

                        asia = city["properties"]["china_excl_sars_taiwan_p"] + city["properties"]["vietnam_p"] + \
                               city["properties"][
                                   "sri_lanka_p"] + \
                               city["properties"]["japan_p"] + city["properties"]["singapore_p"] + city["properties"][
                                   "malaysia_p"] + \
                               city["properties"]["philippines_p"] + city["properties"]["thailand_p"] + \
                               city["properties"][
                                   "hong_kong_sar_china_p"] + \
                               city["properties"]["india_p"] + city["properties"]["indonesia_p"] + city["properties"][
                                   "pakistan_p"] + \
                               city["properties"]["iraq_p"] + \
                               city["properties"]["lebanon_p"] + city["properties"]["korea_republic_south_p"]
                        asia_per += asia
                        total_females_per = city["properties"]["tot_f"]
                        total_males_per = city["properties"]["tot_m"]
                        total_persons_per = city["properties"]["tot_p"]

                    elif (key == "sa4_name16"
                          and str(value).__contains__("Brisbane")):

                        north_america = city["properties"][
                            "united_states_america_p"] + city["properties"][
                                "canada_p"]
                        north_america_bris += north_america
                        africa = city["properties"]["egypt_p"] + city["properties"]["south_africa_p"] + \
                                 city["properties"][
                                     "zimbabwe_p"]
                        africa_bris += africa
                        australia_bris = city["properties"]["australia_p"]
                        new_zealand_bris = city["properties"]["new_zealand_p"]
                        born_elsewhere_bris = city["properties"][
                            "born_elsewhere_p"]
                        europe = city["properties"]["germany_p"] + city["properties"]["croatia_p"] + city["properties"][
                            "united_kingdom_ci_im_p"] + \
                                 city["properties"]["netherlands_p"] + city["properties"]["greece_p"] + \
                                 city["properties"][
                                     "ireland_p"] + \
                                 city["properties"]["italy_p"] + city["properties"]["turkey_p"] + city["properties"][
                                     "poland_p"] + \
                                 city["properties"]["fiji_p"] + city["properties"]["malta_p"]
                        europe_bris += europe

                        asia = city["properties"]["china_excl_sars_taiwan_p"] + city["properties"]["vietnam_p"] + \
                               city["properties"][
                                   "sri_lanka_p"] + \
                               city["properties"]["japan_p"] + city["properties"]["singapore_p"] + city["properties"][
                                   "malaysia_p"] + \
                               city["properties"]["philippines_p"] + city["properties"]["thailand_p"] + \
                               city["properties"][
                                   "hong_kong_sar_china_p"] + \
                               city["properties"]["india_p"] + city["properties"]["indonesia_p"] + city["properties"][
                                   "pakistan_p"] + \
                               city["properties"]["iraq_p"] + \
                               city["properties"]["lebanon_p"] + city["properties"]["korea_republic_south_p"]
                        asia_bris += asia
                        total_females_bris = city["properties"]["tot_f"]
                        total_males_bris = city["properties"]["tot_m"]
                        total_persons_bris = city["properties"]["tot_p"]

                    elif (key == "sa4_name16"
                          and str(value).__contains__("Darwin")):

                        north_america = city["properties"][
                            "united_states_america_p"] + city["properties"][
                                "canada_p"]
                        north_america_dar += north_america
                        africa = city["properties"]["egypt_p"] + city["properties"]["south_africa_p"] + \
                                 city["properties"][
                                     "zimbabwe_p"]
                        africa_dar += africa
                        australia_dar = city["properties"]["australia_p"]
                        new_zealand_dar = city["properties"]["new_zealand_p"]
                        born_elsewhere_dar = city["properties"][
                            "born_elsewhere_p"]
                        europe = city["properties"]["germany_p"] + city["properties"]["croatia_p"] + city["properties"][
                            "united_kingdom_ci_im_p"] + \
                                 city["properties"]["netherlands_p"] + city["properties"]["greece_p"] + \
                                 city["properties"][
                                     "ireland_p"] + \
                                 city["properties"]["italy_p"] + city["properties"]["turkey_p"] + city["properties"][
                                     "poland_p"] + \
                                 city["properties"]["fiji_p"] + city["properties"]["malta_p"]
                        europe_dar += europe

                        asia = city["properties"]["china_excl_sars_taiwan_p"] + city["properties"]["vietnam_p"] + \
                               city["properties"][
                                   "sri_lanka_p"] + \
                               city["properties"]["japan_p"] + city["properties"]["singapore_p"] + city["properties"][
                                   "malaysia_p"] + \
                               city["properties"]["philippines_p"] + city["properties"]["thailand_p"] + \
                               city["properties"][
                                   "hong_kong_sar_china_p"] + \
                               city["properties"]["india_p"] + city["properties"]["indonesia_p"] + city["properties"][
                                   "pakistan_p"] + \
                               city["properties"]["iraq_p"] + \
                               city["properties"]["lebanon_p"] + city["properties"]["korea_republic_south_p"]
                        asia_dar += asia
                        total_females_dar = city["properties"]["tot_f"]
                        total_males_dar = city["properties"]["tot_m"]
                        total_persons_dar = city["properties"]["tot_p"]

                    elif (key == "sa4_name16"
                          and str(value).__contains__("Adelaide")):

                        north_america = city["properties"][
                            "united_states_america_p"] + city["properties"][
                                "canada_p"]
                        north_america_ade += north_america
                        africa = city["properties"]["egypt_p"] + city["properties"]["south_africa_p"] + \
                                 city["properties"][
                                     "zimbabwe_p"]
                        africa_ade += africa
                        australia_ade = city["properties"]["australia_p"]
                        new_zealand_ade = city["properties"]["new_zealand_p"]
                        born_elsewhere_ade = city["properties"][
                            "born_elsewhere_p"]
                        europe = city["properties"]["germany_p"] + city["properties"]["croatia_p"] + city["properties"][
                            "united_kingdom_ci_im_p"] + \
                                 city["properties"]["netherlands_p"] + city["properties"]["greece_p"] + \
                                 city["properties"][
                                     "ireland_p"] + \
                                 city["properties"]["italy_p"] + city["properties"]["turkey_p"] + city["properties"][
                                     "poland_p"] + \
                                 city["properties"]["fiji_p"] + city["properties"]["malta_p"]
                        europe_ade += europe

                        asia = city["properties"]["china_excl_sars_taiwan_p"] + city["properties"]["vietnam_p"] + \
                               city["properties"][
                                   "sri_lanka_p"] + \
                               city["properties"]["japan_p"] + city["properties"]["singapore_p"] + city["properties"][
                                   "malaysia_p"] + \
                               city["properties"]["philippines_p"] + city["properties"]["thailand_p"] + \
                               city["properties"][
                                   "hong_kong_sar_china_p"] + \
                               city["properties"]["india_p"] + city["properties"]["indonesia_p"] + city["properties"][
                                   "pakistan_p"] + \
                               city["properties"]["iraq_p"] + \
                               city["properties"]["lebanon_p"] + city["properties"]["korea_republic_south_p"]
                        asia_ade += asia
                        total_females_ade = city["properties"]["tot_f"]
                        total_males_ade = city["properties"]["tot_m"]
                        total_persons_ade = city["properties"]["tot_p"]

                    elif (key == "sa4_name16" and str(value).__contains__(
                            "Australian Capital Territory")):

                        north_america = city["properties"][
                            "united_states_america_p"] + city["properties"][
                                "canada_p"]
                        north_america_can += north_america
                        africa = city["properties"]["egypt_p"] + city["properties"]["south_africa_p"] + \
                                 city["properties"][
                                     "zimbabwe_p"]
                        africa_can += africa
                        australia_can = city["properties"]["australia_p"]
                        new_zealand_can = city["properties"]["new_zealand_p"]
                        born_elsewhere_can = city["properties"][
                            "born_elsewhere_p"]
                        europe = city["properties"]["germany_p"] + city["properties"]["croatia_p"] + city["properties"][
                            "united_kingdom_ci_im_p"] + \
                                 city["properties"]["netherlands_p"] + city["properties"]["greece_p"] + \
                                 city["properties"][
                                     "ireland_p"] + \
                                 city["properties"]["italy_p"] + city["properties"]["turkey_p"] + city["properties"][
                                     "poland_p"] + \
                                 city["properties"]["fiji_p"] + city["properties"]["malta_p"]
                        europe_can += europe

                        asia = city["properties"]["china_excl_sars_taiwan_p"] + city["properties"]["vietnam_p"] + \
                               city["properties"]["sri_lanka_p"] + \
                               city["properties"]["japan_p"] + city["properties"]["singapore_p"] + city["properties"][
                                   "malaysia_p"] + \
                               city["properties"]["philippines_p"] + city["properties"]["thailand_p"] + \
                               city["properties"][
                                   "hong_kong_sar_china_p"] + \
                               city["properties"]["india_p"] + city["properties"]["indonesia_p"] + city["properties"][
                                   "pakistan_p"] + city["properties"]["iraq_p"] + \
                               city["properties"]["lebanon_p"] + city["properties"]["korea_republic_south_p"]
                        asia_can += asia
                        total_females_can = city["properties"]["tot_f"]
                        total_males_can = city["properties"]["tot_m"]
                        total_persons_can = city["properties"]["tot_p"]

            try:
                db = database.DButils()
                parser = Parser()
                # parse aurin data
                record = parser.parse_aurin(
                    "aurin1", "Melbourne", total_persons_melb,
                    total_males_melb, total_females_melb, asia_melb,
                    europe_melb, australia_melb, new_zealand_melb, africa_melb,
                    north_america_melb, born_elsewhere_melb, median_age_melb,
                    median_household_income_melb, gambling_activities_melb,
                    married_females_melb, unmarried_females_melb,
                    married_males_melb, unmarried_males_melb,
                    married_persons_melb, unmarried_persons_melb)
                record1 = parser.parse_aurin(
                    "aurin2", "Sydney", total_persons_syd, total_males_syd,
                    total_females_syd, asia_syd, europe_syd, australia_syd,
                    new_zealand_syd, africa_syd, north_america_syd,
                    born_elsewhere_syd, median_age_syd,
                    median_household_income_syd, gambling_activities_syd,
                    married_females_syd, unmarried_females_syd,
                    married_males_syd, unmarried_males_syd,
                    married_persons_syd, unmarried_persons_syd)
                record2 = parser.parse_aurin(
                    "aurin3", "Brisbane", total_persons_bris, total_males_bris,
                    total_females_bris, asia_bris, europe_bris, australia_bris,
                    new_zealand_bris, africa_bris, north_america_bris,
                    born_elsewhere_bris, median_age_bris,
                    median_household_income_bris, gambling_activities_bris,
                    married_females_bris, unmarried_females_bris,
                    married_males_bris, unmarried_males_bris,
                    married_persons_bris, unmarried_persons_bris)
                record3 = parser.parse_aurin(
                    "aurin4", "Darwin", total_persons_dar, total_males_dar,
                    total_females_dar, asia_dar, europe_dar, australia_dar,
                    new_zealand_dar, africa_dar, north_america_dar,
                    born_elsewhere_dar, median_age_dar,
                    median_household_income_dar, gambling_activities_dar,
                    married_females_dar, unmarried_females_dar,
                    married_males_dar, unmarried_males_dar,
                    married_persons_dar, unmarried_persons_dar)
                record4 = parser.parse_aurin(
                    "aurin5", "Adelaide", total_persons_ade, total_males_ade,
                    total_females_ade, asia_ade, europe_ade, australia_ade,
                    new_zealand_ade, africa_ade, north_america_ade,
                    born_elsewhere_ade, median_age_ade,
                    median_household_income_ade, gambling_activities_ade,
                    married_females_ade, unmarried_females_ade,
                    married_males_ade, unmarried_males_ade,
                    married_persons_ade, unmarried_persons_ade)
                record5 = parser.parse_aurin(
                    "aurin6", "Hobart", total_persons_hob, total_males_hob,
                    total_females_hob, asia_hob, europe_hob, australia_hob,
                    new_zealand_hob, africa_hob, north_america_hob,
                    born_elsewhere_hob, median_age_hob,
                    median_household_income_hob, gambling_activities_hob,
                    married_females_hob, unmarried_females_hob,
                    married_males_hob, unmarried_males_hob,
                    married_persons_hob, unmarried_persons_hob)
                record6 = parser.parse_aurin(
                    "aurin7", "Canberra", total_persons_can, total_males_can,
                    total_females_can, asia_can, europe_can, australia_can,
                    new_zealand_can, africa_can, north_america_can,
                    born_elsewhere_can, median_age_can,
                    median_household_income_can, gambling_activities_can,
                    married_females_can, unmarried_females_can,
                    married_males_can, unmarried_males_can,
                    married_persons_can, unmarried_persons_can)
                record7 = parser.parse_aurin(
                    "aurin8", "Perth", total_persons_per, total_males_per,
                    total_females_per, asia_per, europe_per, australia_per,
                    new_zealand_per, africa_per, north_america_per,
                    born_elsewhere_per, median_age_per,
                    median_household_income_per, gambling_activities_per,
                    married_females_per, unmarried_females_per,
                    married_males_per, unmarried_males_per,
                    married_persons_per, unmarried_persons_per)

                # save into couchdb
                db.save(aurin_db_name, record)
                db.save(aurin_db_name, record1)
                db.save(aurin_db_name, record2)
                db.save(aurin_db_name, record3)
                db.save(aurin_db_name, record4)
                db.save(aurin_db_name, record5)
                db.save(aurin_db_name, record6)
                db.save(aurin_db_name, record7)

            except Exception as e:
                print(e)

        except Exception as e:
            print(e)
コード例 #4
0
# Class: COMP90024 ASSIGNMENT 2 - Semester 2, 2019
# Member 1: Naiyun Wu - 1008438
# Member 2: Kuldeep Suhag - 919397
# Member 3: Hongtao Ni - 938737
# Member 4: Duoyi Zhang - 956812
# Member 5: Zexian Huang - 1012710

from database.parser import Parser



instagram_file_name = " /Users/kuldeepsuhag/Desktop/Assignment2/COMP90024-ASSIGNMENT-2/Crawler/harvestor/instagram.json"


with open(instagram_file_name, encoding="utf8") as f:
    cnt = 0
    for line in f:
        try:
            data = json.loads(line[0:len(line) - 2])
            #print(data)
            Parser(data)

        except:
            try:
                data = json.loads(line[0:len(line) - 1])
                #print(data)
                Parser(data)
            except:
                continue

コード例 #5
0
import os

import requests

from app.models.Actor import ActorORM
from app.models.Country import CountryORM
from app.models.Film import FilmORM
from app.models.Genre import GenreORM
from database.parser import Parser
from django.core.files.images import ImageFile
from django.core.management.base import BaseCommand
from django.db import transaction

parser = Parser()


class Command(BaseCommand):
    help = 'Closes the specified poll for voting'

    @transaction.atomic()
    def handle(self, *args, **options):

        self.generate_genres()
        self.generate_countries()
        self.generate_actors()
        self.generate_films()
        self.set_images()

    def generate_genres(self):
        genres = parser.get_genres()
        print("GENERATE_GENRES ", len(genres))