Exemple #1
0
    def do_POST(self):
        self.data_string = self.rfile.read(int(self.headers['Content-Length']))
        print(self.data_string)
        print(self.requestline)
        req = self.requestline.split()
        if (req[1] == '/deploy'):
            # response status code
            self.send_response(200)
        else:
            self.send_response(404)

        # header
        self.send_header('Content-type', 'text/html')
        self.end_headers()
        # remove characters like %7B
        try:
            from urllib import unquote
        except ImportError:
            from urllib.parse import unquote
        rawstr = unquote(self.data_string.decode('utf8'))
        # remove `payload=`
        tmp, jsonStr = rawstr.split('payload=')
        data = json.loads(jsonStr)
        commit_comments = data['commits']
        for i in range(len(commit_comments)):
            txt = commit_comments[i]['message'].replace('+', ' ')
            print(txt)
            comment(txt + " --auto sync")
            send_weibo(
                txt +
                "https://esl.hohoweiya.xyz Auto Sync https://esl.szcfweiya.cn")
Exemple #2
0
def main():
    print("Starting...")
    youtube = YouTube()

    channel = youtube.get_channel(CHANNEL)

    print("Waiting for new videos...")
    start_time = time.asctime()

    while True:
        upload = youtube.get_latest_upload(channel, start_time)
        if upload:
            try:
                url = 'https://youtube.com/watch?v={}'.format(upload['id'])
                print("{} | {} uploaded a new video titled \"{}\"".format(
                    time.asctime(), upload['channel_title'], upload['title'],
                    url))
                if comment(youtube.api, upload['id'], COMMENT_TEXT):
                    print("{} | \"{}\" was commented on {}'s new video \"{}\"".
                          format(time.asctime(), COMMENT_TEXT,
                                 upload['channel_title'], upload['title']))
                start_time = time.time()

            except Exception as e:
                # If it reaches the 100 seconds api threshold, wait for 100 seconds
                print("Error: Too many requests:\n{}".format(e))
                print("Waiting 100 seconds..")
                time.sleep(100)
    def get_all_user_comments(username):

        comment_list = []

        cur_connection = DatabaseManager.get_connection()

        cursor = cur_connection.cursor()

        result = DatabaseManager._execute_robust(
            cursor, '''
                    SELECT comment_id,
                    post_id, username,
                    parent_comment, comment_karma,
                    comment_date, subreddit
                    FROM comments WHERE username='******'
                '''.format(username=username))

        for comment_result in result:

            new_comment = comment(comment_id=comment_result[0],
                                  post_id=comment_result[1],
                                  username=comment_result[2],
                                  parent_comment=comment_result[3],
                                  comment_karma=comment_result[4],
                                  comment_date=comment_result[5],
                                  subreddit=comment_result[6])

            comment_list.append(new_comment)

        return comment_list
Exemple #4
0
	def addComment(self, msg):
		#row = randint(0, 20) * 25
		comm = comment(msg, 1184, self.i*25, self.font)
		self.i += 1
		if self.i >= 20:
			self.i = 0
		#print "Added comment: " + msg
		self.comments.append(comm)
Exemple #5
0
def reddit_scraper(my_url):
    '''
    returns an array where the first element is the post (comment object) and the rest is
    comment objects about a news article 
    '''
    uClient = urllib.urlopen(my_url)
    page_html = uClient.read()
    uClient.close()
    page_soup = soup(page_html, "html.parser")
    stories = page_soup.findAll("div", {"class": "entry unvoted"})
    regex = re.compile(".*?\((.*?)\)")
    body = ''
    author = ""
    children = []
    for thing in stories[::-1]:
        derp = thing.findAll("p")
        for x in derp:
            z = re.findall(regex, x.text)
            d = [[s.encode("utf-8", "ignore") for s in p] for p in z]
            result = x.text
            if len(d) > 0:
                print "AUTHOR: " + author
                print "BODY: " + body
                children.insert(0, comment(author, body))
                body = ""
                author = ""
                if ''.join(d[0])[0].strip().isdigit():  #is children a digit?
                    kids = int(
                        ''.join(d[0]).split()
                        [0])  # NOT USING THIS ANYMORE, MAYBE IN THE FUTURE
                    # We are not implementing a heirarchy, instead we are just saying that all comments
                    # are on the same level.
                else:  # else, we know that this is the end of the page(beginning I should say)
                    author = ''.join(d[0])
                if len(
                        re.sub("[\(\[].*?[\)\]]", "", result).encode(
                            "utf-8", "ignore")) > 0:
                    author = ''.join(
                        re.sub("[\(\[].*?[\)\]]", "",
                               result).encode("utf-8", "ignore")).split()[0]

            else:
                if len(
                        re.sub("[\(\[].*?[\)\]]", "", result).encode(
                            "utf-8",
                            "ignore").split()) > 0 and "submitted" == re.sub(
                                "[\(\[].*?[\)\]]", "", result).encode(
                                    "utf-8", "ignore").split()[0]:
                    derp = re.sub("[\(\[].*?[\)\]]", "",
                                  result).encode("utf-8", "ignore").split()
                    author = derp[len(derp) - 1]
                else:
                    body += re.sub("[\(\[].*?[\)\]]", "",
                                   result).encode("utf-8", "ignore")

    return children
Exemple #6
0
 def addChildren(self, parent, db_comment_list):
     """recursively add children of parent comment"""
     # search through all comments retrieved from the database
     for db_comment in db_comment_list:
         # look for comments whose parent is the current comment
         if parent.comment_id == db_comment.parent_id:
             # construct Python object for comment, and add to parent
             cmt = comment(db_comment.comment_id, db_comment.name,
                           db_comment.text, db_comment.date)
             #print("adding " + str(cmt) + " to " + str(parent))
             parent.addChild(cmt)
             self.addChildren(cmt, db_comment_list)
Exemple #7
0
    def load(self, db_comment_list):
        """load, organize comment data from list of all comments in database"""
        # create temporary 'root' comment, w/ unique comment_id of 0
        root = comment(0, "", "", "")

        # recursively add children of root, starting w/ top level comments
        self.addChildren(root, db_comment_list)

        # retrieve children of root, which are all top level comments
        self.comments = root.children

        # sort by id in ascending order, so that oldest comments appear first
        self.comments = sorted(self.comments, key=lambda x: x.comment_id)
        return self.comments
Exemple #8
0
 def get_annotations(self, cap=None):
     '''Collect the annotations of the user, is cap is given, only collect the most recent ones, otherwise fetch all (requires <Number of annotations>/10 calls)'''
     next_page = self.host + "/annotations/for_profile_page?page=1&user_id={user_id}".format(user_id=self.rg_id)
     self.annotations = []
     while next_page != None:
         print next_page
         r = requests.get(next_page)
         soup = BeautifulSoup(r.content)
         if self.login == None:
             #if we don't have the login, attempt to scrap it here
             try:
                 #this works if there are no annotations
                 self.login = soup.select(".empty_message")[0].text.strip().replace(u" hasn't annotated any lines!", '')
             except IndexError:
                 try:
                     #this should work if there are annotations (maybe there are other a classes for some users, but I can't find any)
                     self.login = soup.select('div.annotation_unit_label a.community_contributor')[0].text
                 except IndexError:
                     try:
                         #this one is for staff
                         self.login = soup.select('div.annotation_unit a.login')[0].attrs['href'].split('/')[-1]
                     except IndexError:
                         #giving up
                         pass
         for annotation in soup.select("div.stand_alone_annotation_container"):
             annotation_id = annotation.select("div.annotation_unit")[0].get("data-id")
             # print "id:", annotation_id
             annotation_content = annotation.find(attrs={'class':'annotation_body'}).text
             try:
                 song_link = annotation.find('a', attrs={'class':'title'}).get('href')
             except AttributeError:
                 #alt rendering puts it in a prior div, try there.
                 try:
                     song_link = annotation.findPrevious('div', attrs={'class':'stand_alone_referent'}).find('a', attrs={'class':'title'}).get('href')
                 except:
                     #print the annotation id and give up
                     print "**couldn't grab annotation", annotation_id,"from user", self.rg_id, "moving on"
                     continue
             self.annotations.append(comment(rg_id=annotation_id, text=annotation_content, song=song_link))
             self.annotations[-1].get_full_history()
         pagination_block = soup.find("div", attrs={"class":"pagination"})
         try:
             next_page = pagination_block.find(attrs={'class':"next_page"}).get('href') #if last page this gives None
             if next_page != None:
                 next_page = self.host + next_page
         except AttributeError, err:
             #if we're here, there's no pagination block, due to exactly one page of annons, so, done.
             next_page = None
Exemple #9
0
    def fetchCommentMetaRecent(subreddit, limit=100):

        sub = RedditManager.get_connection().subreddit(subreddit)

        commentlist = sub.comments(limit=limit)

        meta_list = []

        try:

            for temp_comment in commentlist:

                comment_id = temp_comment.id

                post_id = temp_comment.submission.id

                username = temp_comment.author.name

                parent_comment = None

                # We need to split here, because the incoming id uses the fullname syntax
                if temp_comment.parent_id.split('_')[0] == 't1':
                    parent_comment = temp_comment.parent_id.split('_')[1]

                comment_karma = temp_comment.score

                comment_date = temp_comment.created_utc

                subreddit = str(temp_comment.subreddit)

                new_comment = comment(comment_id=comment_id,
                                      post_id=post_id,
                                      username=username,
                                      parent_comment=parent_comment,
                                      comment_karma=comment_karma,
                                      comment_date=comment_date,
                                      subreddit=subreddit)

                meta_list.append(new_comment)

        except:

            pass

        return meta_list
    def get_all_comments(dateLimit=None):

        comment_list = []

        cur_connection = DatabaseManager.get_connection()

        cursor = cur_connection.cursor()

        result = []

        result = DatabaseManager._execute_robust(cursor,
                                                 'SELECT * FROM comments')

        for row in result:

            new_comment = comment(row[0], row[1], row[2], row[3], row[4],
                                  row[5], row[6])

            comment_list.append(new_comment)

        return comment_list
Exemple #11
0
    def fetchCommentMeta(id):

        reddit_comment = RedditManager.get_connection().comment(id=id)

        comment_id = reddit_comment.id

        post_id = reddit_comment.submission.id

        if reddit_comment.author is None:
            username = "******"
        else:
            username = reddit_comment.author.name

        parent_comment = None

        # We need to split here, because the incoming id uses the fullname syntax
        if reddit_comment.parent_id.split('_')[0] == 't1':
            parent_comment = reddit_comment.parent_id.split('_')[1]

        comment_karma = reddit_comment.score

        comment_date = reddit_comment.created_utc

        subreddit = str(reddit_comment.subreddit)

        new_comment = comment(comment_id=comment_id,
                              post_id=post_id,
                              username=username,
                              parent_comment=parent_comment,
                              comment_karma=comment_karma,
                              comment_date=comment_date,
                              subreddit=subreddit)

        return new_comment

        pass
Exemple #12
0
    def commentElement_create(self, **kwargs):
        """
        Populate the comment component with simulated comments/conversations.
        :param kwargs:
        :return:
        """

        conversations = 1
        s = self._stree
        str_root    = '/'
        for key, val in kwargs.iteritems():
            if key == 'root':           str_root        = val
            if key == 'conversations':  conversations   = val

        sample     = comment.comment()
        sample.contents_rikeripsumBuild(conversations=conversations)

        s.cd(str_root)
        l_comment   = sample.contents.lstr_lsnode('/')
        if sample.contents.cd('/')['status']:
            for c in l_comment:
                s.graft(sample.contents, '/%s' % c)

        return(dict(sample.contents.snode_root))
Exemple #13
0
    def commentElement_create(self, **kwargs):
        """
        Populate the comment component with simulated comments/conversations.
        :param kwargs:
        :return:
        """

        conversations = 1
        s = self._stree
        str_root = '/'
        for key, val in kwargs.iteritems():
            if key == 'root': str_root = val
            if key == 'conversations': conversations = val

        sample = comment.comment()
        sample.contents_rikeripsumBuild(conversations=conversations)

        s.cd(str_root)
        l_comment = sample.contents.lstr_lsnode('/')
        if sample.contents.cd('/')['status']:
            for c in l_comment:
                s.graft(sample.contents, '/%s' % c)

        return (dict(sample.contents.snode_root))
Exemple #14
0
def get_all_requests(campid):
    key = get_campaign_key(campid)
    q1 = '''select Request.PrKeyPF as id, Request.priority, Request.code as prepid, Request.authorName as author_name, Request.authorCMSid as author_cmsid, Request.authorInstCode as author_inst_code,
                Request.pwg, Request.status, Request.statusFlow as status_flow, Request.validation, Request.type, Request.swrelease as cmssw_release, Request.inputFileName as input_filename, Request.dataTier as data_tier, 
                Request.eventContent as event_content, Request.genFragment as gen_fragment, Request.dataSetName as dataset_name, Request.pileupDatasetName as pileup_dataset_name, Request.www,
                Request.processStr as process_string, Request.inputBlock as input_block, Request.preSteps as pre_steps, Request.cvsTag as cvs_tag, Request.inputCMSgen as input_cms_gen, Request.PVTflag as pvt_flag,
                Request.PVTcomment as pvt_comment, Request.conditions, Request.generators, Request.pileupScenario as pileup_scenario, Request.datamixerScenario as datamixer_scenario, Request.MCDBid as mcdb_id,
                Request.notes, Request.description, Request.remarks, Request.approvals, Request.runRange as run_range, Request.ALCA as alca, Request.SKIM as skim, Request.SKIMinput as skim_input, Request.cmsGEn as cms_gen,
                Request.cmsGENfile as cms_gen_file, requestDate as submission_date
                from Request where campaignKey=''' + str(key) + ';'
    q2 = '''select customizeName1 as customize_name, customizeFunction1 as customize_function, sequence1 as sequence, kcustomizeName1 as kcustomize_name,
                kcustomizeFunction1 as kcustomize_function, ksequence1 as ksequence from Request where code='''
    q3 = '''select customizeName2 as customize_name, customizeFunction2 as customize_function, sequence2 as sequence, kcustomizeName2 as kcustomize_name,
                kcustomizeFunction2 as kcustomize_function, ksequence2 as ksequence from Request where code='''
    q4 = '''select step from RequestOptions where forRequest='''
    q5 = '''select nbEvents as total_events, nbEventsCompleted as completed_events, timeEvent as time_event, sizeEvent as size_event, TP as tp, unit 
                from Resources where forRequest='''
    q6 = '''select version, ptMax as pt_max, ptMin as pt_min, ptHatMax as pt_hat_max, ptHatMin as pt_hat_min, sHatMax as s_hat_max, sHatMin as s_hat_min, mInvMin as m_inv_min,
                mInvMax as m_inv_max,crossSection as cross_section, filterEff as filter_efficiency, filterEffError as filter_efficiency_error, matchEff as match_efficiency,
                updateDate as submission_date, updaterCMSid as author_cmsid, updaterName as author_name, updaterInstCode as author_inst_code, updaterProject as author_project
                from Resources where forRequest=24445'''
    q7 = '''select authorCMSid as author_cmsid, authorName as author_name, authorInstCode as author_inst_code, commentDate as submission_date, body as message
                from Comment where forKey='''
    q8 = '''select cmsid as author_cmsid, name as author_name, instCode as author_inst_code, project as approval_step, approvalDate as submission_date
                from ApprovalStep where status="OK" and forKey='''
    
    cursor.execute(q1)
    requests = cursor.fetchall()
    for req in requests:
        prepid = '\'' + req['prepid'] + '\';'
        key = str(req['id']) + ';'
        sequences = []
        approvals = []
        gen_params = []
        comments = []

        # get approvals
        cursor.execute(q8+key)
        apps = cursor.fetchall()
        for app in apps:
            appro = approval(app['author_name'],  app['author_cmsid'],  app['author_inst_code'])
            subby = appro.get_attribute('approver')
            subby['submission_date'] = convert_date(app['submission_date'])
            appro.set_attribute('approver', subby)
            a = appro.build(app['approval_step'])
            approvals.append(a)

        # custs
        # get seq1
        cursor.execute(q2+prepid)
        seq1 = cursor.fetchone()
        seq = {'index':1}
        custname = []
        custfunc = []
        sequ = ''
        for assoc_key in seq1:
            if 'customizeName' in assoc_key:
                if seq1[assoc_key]:
                    custname.append(seq1[assoc_key])
            if 'customizeFunction' in assoc_key:
                if seq1[assoc_key]:
                    custfunc.append(seq1[assoc_key])
            if 'sequence' in assoc_key:
                sequ = seq1[assoc_key]
            seq['customize_name'] = custname
            seq['customize_function'] = custfunc
            seq['sequence'] = sequ
        sequences.append(seq)

        # get seq2
        cursor.execute(q3+prepid)
        seq2 = cursor.fetchone()
        seq = {'index':1}
        custname = []
        custfunc = []
        sequ = ''
        for assoc_key in seq2:
            if 'customizeName' in assoc_key:
                if seq2[assoc_key]:
                    custname.append(seq2[assoc_key])
            if 'customizeFunction' in assoc_key:
                if seq2[assoc_key]:
                    custfunc.append(seq2[assoc_key])
            if 'sequence' in assoc_key:
                sequ = seq2[assoc_key]
            seq['customize_name'] = custname
            seq['customize_function'] = custfunc
            seq['sequence'] = sequ
        sequences.append(seq)

        # options
        cursor.execute(q4+key)
        ops = cursor.fetchall()
        req['step'] = ops[-1]['step'] # get latest

        # resources (main)
        cursor.execute(q5+key)
        ress = cursor.fetchall() 
        res = ress[-1] # get latest
        for assoc_key in res:
            req[assoc_key] = res[assoc_key]

        # gen parameters
        cursor.execute(q6+key)
        gens = cursor.fetchall()
        for gen in gens:
            if not gen['author_name']:
                gen['author_name'] = req['author_name']
            s = submission_details().build(gen['author_name'],  gen['author_cmsid'],  gen['author_inst_code'],  gen['author_project'])
            s['submission_date'] = gen['submission_date']
            gen['submission_details'] = s
            g = generator_parameters(gen['author_name'])
            for assoc_key in gen:
                try:
                    g.set_attribute(assoc_key,  gen[assoc_key])
                except Exception as ex:
                    continue
            gen_params.append(g.json())

        # get comments
        cursor.execute(q7 + key)
        comms = cursor.fetchall()
        for comm in comms:
            c = comment(comm['author_name'],  comm['author_cmsid'],  comm['author_inst_code']).build(comm['message'])
            temp = c['submission_details']
            temp['submission_date'] = convert_date(comm['submission_date'])
            c['submission_details'] = temp
            comments.append(c)

        # build request
        rt = request(req['author_name'],  req['author_cmsid'],  req['author_inst_code'])
        s = rt.get_attribute('submission_details')
        date, time = req['submission_date'].rsplit(' ')
        s['submission_date'] = convert_date(date,  time)
        rt.set_attribute('submission_details',  s)
        rt.set_attribute('approvals',  approvals)
        rt.set_attribute('sequences',  sequences)
        rt.set_attribute('generator_parameters',  gen_params)
        rt.set_attribute('comments',  comments)

        for assoc_key in req:
            try:
                if assoc_key == 'approvals':
                    continue
                rt.set_attribute(assoc_key, req[assoc_key])
            except Exception as ex:
                continue
#        rt.print_self()
#        print 
#        print '###########################################'
#        print        

        yield rt
Exemple #15
0
def get_campaign(campid):
    key = get_campaign_key(campid)
    q1 = '''select id as prepid, authorName as author_name,startDate as start_date, endDate as end_date, energy, type, prodType as production_type, 
    reprType as repr_type, swrelease as cmssw_release, description, remarks, validation, pileupDatasetName as pileup_dataset_name, 
    processStr as process_string, conditions, generators, pileupScenario as pileup_scenario, datamixerScenario as datamixer_scenario, inputFileName as input_filename, 
    www, preSteps as pre_steps, dataTier as data_tier, eventContent as event_content, nbEvt as total_events, nbEvtCompleted as completed_events, approvals, 
    authorCMSid as author_cmsid, authorInstCode as author_inst_code
    from Campaign where id=''' + '\'' + str(campid) + '\';'

    q2 = '''select sequence1 as sequence,  customizeName1 as customize_name,  customizeFunction1 as customize_function from Campaign where id=''' + '\'' + str(campid) + '\';'
    q3  = '''select sequence2 as sequence,  customizeName2 as customize_name,  customizeFunction2 as customize_function from Campaign where id=''' + '\'' + str(campid) + '\';'
    q4 = '''select authorCMSid as author_cmsid, authorName as author_name, authorInstCode as author_inst_code, commentDate as submission_date, body as message
                from Comment where forKey=''' + str(key) + ';'
    q5 = '''select cmsid as author_cmsid, name as author_name, instCode as author_inst_code, project as approval_step, approvalDate as submission_date
                from ApprovalStep where status="OK" and forKey=''' + str(key) + ';'
    approvals = []
    sequences = []
    comments = []
    
    
    # get campaign
    cursor.execute(q1)
    camp_json = cursor.fetchone()
    
    # get comments
    cursor.execute(q4)
    comms = cursor.fetchall()
    for comm in comms:
        c = comment(comm['author_name'],  comm['author_cmsid'],  comm['author_inst_code']).build(comm['message'])
        temp = c['submission_details']
        date, time = comm['submission_date'].rsplit(' ')
        temp['submission_date'] = convert_date(date, time)
        c['submission_details'] = temp
        comments.append(c)
        
    # custs
    # get seq1
    cursor.execute(q2)
    seq1 = cursor.fetchone()
    seq = {'index':1}
    custname = []
    custfunc = []
    sequ = ''
    for assoc_key in seq1:
        if 'customizeName' in assoc_key:
            if seq1[assoc_key]:
                custname.append(seq1[assoc_key])
        if 'customizeFunction' in assoc_key:
            if seq1[assoc_key]:
                custfunc.append(seq1[assoc_key])
        if 'sequence' in assoc_key:
            sequ = seq1[assoc_key]
        seq['customize_name'] = custname
        seq['customize_function'] = custfunc
        seq['sequence'] = sequ
    sequences.append(seq)
    
    # get seq2
    cursor.execute(q3)
    seq2 = cursor.fetchone()
    seq = {'index':1}
    custname = []
    custfunc = []
    sequ = ''
    for assoc_key in seq2:
        if 'customizeName' in assoc_key:
            if seq2[assoc_key]:
                custname.append(seq2[assoc_key])
        if 'customizeFunction' in assoc_key:
            if seq2[assoc_key]:
                custfunc.append(seq2[assoc_key])
        if 'sequence' in assoc_key:
            sequ = seq2[assoc_key]
        seq['customize_name'] = custname
        seq['customize_function'] = custfunc
        seq['sequence'] = sequ
    sequences.append(seq)
    
    # get approvals
    allowed = ['SIM',  'HLT',  'L1',  'ALCA',  'RECO',  'Start'] # campaign hack
    cursor.execute(q5)
    apps = cursor.fetchall()
    for app in apps:
        appro = approval(app['author_name'],  app['author_cmsid'],  app['author_inst_code'])
        subby = appro.get_attribute('approver')
        subby['submission_date'] = convert_date(app['submission_date'])
        appro.set_attribute('approver', subby)
        if app['approval_step'] not in allowed:
            allowed.append(app['approval_step'])
        appro.set_approval_steps(allowed)        
        a = appro.build(app['approval_step'])
        approvals.append(a)

    camp_json['approvals'] = approvals
    camp_json['sequences'] = sequences
    camp_json['comments'] = comments
    campy = campaign(camp_json['author_name'],  camp_json['author_cmsid'],  camp_json['author_inst_code'])
    #print simplejson.dumps(camp_json,  sort_keys=True, indent=4)
    for key in camp_json:
        try:
            if key == 'start_date' or key == 'end_date':
                campy.set_attribute(key,  convert_date(camp_json[key]))
                continue
            campy.set_attribute(key,  camp_json[key])
        except Exception as ex:
            continue
            
    campy.set_attribute('id',  key)
    #campy.print_self()
    return campy
    def get_comments_by_film(self, film_id):
        params = {
            'start': '0',
            'limit': '20',
            'status': 'P',
            'sort': 'new_score'
        }
        r = proxy.gethtml(
            'https://movie.douban.com/subject/' + str(film_id) + '/comments',
            self.headers, params)
        if r is None:
            return
        soup = BeautifulSoup(r.content.decode(), 'html.parser')
        if soup is None:
            return

        tot = 0
        cmt_tab = soup.find('ul', {'class': 'fleft CommentTabs'})
        if cmt_tab:
            cmt_tab_span = cmt_tab.find('span')
            if cmt_tab_span:
                txtr = re.search(r'(\d+)', cmt_tab_span.get_text())
                if txtr:
                    tot = int(txtr.group(1))
        #限制一下条数防止爬的太多爬不完
        if tot > util.COMMENT_MAX:
            tot = util.COMMENT_MAX

        for i in range(0, int(tot / 20) + 1):
            params = {
                'start': str(i * 20),
                'limit': '20',
                'status': 'P',
                'sort': 'new_score'
            }
            r = proxy.gethtml(url='https://movie.douban.com/subject/' +
                              str(film_id) + '/comments',
                              params=params,
                              headers=self.headers)
            if r is None:
                continue
            soup = BeautifulSoup(r.content.decode(), 'html.parser')
            if soup is None:
                continue

            cmts = soup.find_all('div', attrs={'class': 'comment-item'})
            if cmts is None:
                continue
            for cmt in cmts:
                c = comment.comment()
                c.comment_id = cmt.get('data-cid')
                #<span class="votes vote-count">1042</span>

                sf = cmt.find('span', attrs={'class': 'votes vote-count'})
                if sf:
                    c.comment_useful = sf.get_text()
                ci = cmt.find('span', {'class': 'comment-info'})
                if ci:
                    un = ci.find('a')
                    if un:
                        c.user_name = un.get_text()
                        c.user_url = un.get('href')

                urla = re.sub(r'\/$', '', c.user_url).split('/')
                if len(urla) > 1:
                    c.user_id = urla[-1]

                dbusers = sql.get_user_byid(c.user_id)
                if len(dbusers) == 0:
                    new_user = user.user()
                    new_user.user_id = c.user_id
                    new_user.user_name = c.user_name
                    new_user.user_url = c.user_url

                    new_user.get_user_info(new_user.user_url)
                    try:
                        dbusers = sql.get_user_byid(new_user.user_id)
                        if len(dbusers) == 0:
                            sql.save_user(new_user)
                        if new_user.visible == '0':
                            sql.update_user_spider(new_user.user_id)
                    except Exception as e:
                        log.logger.info(str(e))

                #<span title="力荐" class="allstar50 rating"></span>
                if ci:
                    star = ci.find('span', {'class': re.compile('allstar')})
                    if star:
                        c.star = str(
                            int(star.get('class')[0].replace('allstar', '')) /
                            10)

                    sf = ci.find('span', {'class': 'comment-time'})
                    if sf:
                        c.comment_time = sf.get_text().strip()

                sf = cmt.find('p', {'class': 'comment-content'})
                if sf:
                    sfs = sf.find('span')
                    if sfs:
                        c.comment_content = sfs.get_text()

                c.film_id = film_id
                try:
                    dbcmts = sql.get_comment_byid(c.comment_id)
                    if len(dbcmts) == 0:
                        sql.save_comment(c)
                except Exception as e:
                    log.logger.info("cid:" + str(c.comment_id))
                    log.logger.info(str(e))

def menu():
    print " MENU"
    print ""
    print " 1. Train"
    print " 2. Test"
    print " 3. Load Data"
    print " 4. Record Data"
    print " 5. Check Comment"
    print " 6. Exit"
    print ""

if __name__ == '__main__':

    com = comment.comment()
    header()

    while 1:
        menu()
        number = int(raw_input('Enter your input Number : '))
        if number == 1:
            print ""
            print "TRAINING"
            print ""
            plst = raw_input('Enter the file name of positive comments list:')
            nlst = raw_input('Enter the file name of negative comments list:')
            print ""
            print "Training is started ... "
            print ""
            if com.positive_input(plst) == 1:
Exemple #18
0
import comment

if __name__ == "__main__":
    #c=comment.comment(2384387)
    c = comment.comment(4609870)
    c.get_comment()