Example #1
0
def contests():
    """
        Show the upcoming contests
    """

    today = datetime.datetime.today()
    today = datetime.datetime.strptime(str(today)[:-7], "%Y-%m-%d %H:%M:%S")

    start_date = today.date()
    end_date = start_date + datetime.timedelta(90)
    url = "https://contesttrackerapi.herokuapp.com/"
    response = requests.get(url)
    if response.status_code == 200:
        response = response.json()["result"]
    else:
        # @todo: something better
        return dict()

    ongoing = response["ongoing"]
    upcoming = response["upcoming"]
    contests = []
    cal = pdt.Calendar()

    table = TABLE(_class="centered striped")
    thead = THEAD(
        TR(TH("Contest Name"), TH("Site"), TH("Start"), TH("Duration/Ending"),
           TH("Link"), TH("Add Reminder")))
    table.append(thead)
    tbody = TBODY()

    for i in ongoing:

        if i["Platform"] in ("TOPCODER", "OTHER"):
            continue

        try:
            endtime = datetime.datetime.strptime(i["EndTime"],
                                                 "%a, %d %b %Y %H:%M")
        except ValueError:
            continue
        tr = TR()
        span = SPAN(_class="green tooltipped",
                    data={"position": "right",
                          "delay": "50",
                          "tooltip": "Live Contest"},
                    _style="cursor: pointer; " + \
                            "float:right; " + \
                            "height:10px; " + \
                            "width:10px; " + \
                            "border-radius: 50%;")
        tr.append(TD(i["Name"], span))
        tr.append(TD(i["Platform"].capitalize()))
        tr.append(TD("-"))
        tr.append(TD(str(endtime).replace("-", "/"),
                     _class="contest-end-time"))
        tr.append(
            TD(
                A(I(_class="fa fa-external-link-square fa-lg"),
                  _class="btn-floating btn-small green accent-4 tooltipped",
                  _href=i["url"],
                  data={
                      "position": "left",
                      "tooltip": "Contest Link",
                      "delay": "50"
                  },
                  _target="_blank")))
        tr.append(
            TD(
                BUTTON(
                    I(_class="fa fa-calendar-plus-o"),
                    _class=
                    "btn-floating btn-small orange accent-4 tooltipped disabled",
                    data={
                        "position": "left",
                        "tooltip": "Already started!",
                        "delay": "50"
                    })))
        tbody.append(tr)

    # This id is used for uniquely identifying
    # a particular contest in js
    button_id = 1
    for i in upcoming:

        if i["Platform"] in ("TOPCODER", "OTHER"):
            continue

        start_time = datetime.datetime.strptime(i["StartTime"],
                                                "%a, %d %b %Y %H:%M")
        tr = TR(_id="contest-" + str(button_id))
        tr.append(TD(i["Name"]))
        tr.append(TD(i["Platform"].capitalize()))
        tr.append(TD(str(start_time)))

        duration = i["Duration"]
        duration = duration.replace(" days", "d")
        duration = duration.replace(" day", "d")
        tr.append(TD(duration))
        tr.append(
            TD(
                A(I(_class="fa fa-external-link-square fa-lg"),
                  _class="btn-floating btn-small green accent-4 tooltipped",
                  _href=i["url"],
                  data={
                      "position": "left",
                      "tooltip": "Contest Link",
                      "delay": "50"
                  },
                  _target="_blank")))
        tr.append(
            TD(
                BUTTON(
                    I(_class="fa fa-calendar-plus-o"),
                    _class="btn-floating btn-small orange accent-4 tooltipped",
                    data={
                        "position": "left",
                        "tooltip": "Set Reminder to Google Calendar",
                        "delay": "50"
                    },
                    _id="set-reminder-" + str(button_id))))
        tbody.append(tr)
        button_id += 1

    table.append(tbody)
    return dict(table=table, upcoming=upcoming)
Example #2
0
 def __init__(self):
     self._db = DatabaseManager()
     self._pdt = parsedatetime.Calendar(parsedatetime.Constants(
         self.LOCALE))
Example #3
0
def conv_dt(dt_string):
    cal = parsedatetime.Calendar()
    dt_obj, _ = cal.parseDT(datetimeString=dt_string)

    return dt_obj
Example #4
0
def process_message(config, data, event, context):
    logger = logging.getLogger('pubsub2inbox')

    # Ignore messages submitted before our retry period
    retry_period = '2 days ago'
    if 'retryPeriod' in config:
        retry_period = config['retryPeriod']
    retry_period_parsed = parsedatetime.Calendar().parse(retry_period)
    if len(retry_period_parsed) > 1:
        retry_earliest = datetime.fromtimestamp(mktime(retry_period_parsed[0]),
                                                timezone.utc)
    else:
        retry_earliest = datetime.fromtimestamp(mktime(retry_period_parsed),
                                                timezone.utc)
    message_time = parser.parse(context.timestamp)
    if (message_time - retry_earliest) < timedelta(0, 0):
        logger.warning('Ignoring message because it\'s past the retry period.',
                       extra={
                           'event_id': context.event_id,
                           'retry_period': retry_period,
                           'retry_earliest': retry_earliest.strftime('%c'),
                           'event_timestamp': message_time
                       })
        raise MessageTooOldException(
            'Ignoring message because it\'s past the retry period.')

    template_variables = {
        'data': data,
        'event': event,
        'context': context,
    }

    jinja_environment = get_jinja_environment()
    if 'processors' in config:
        for processor in config['processors']:
            logger.debug('Processing message using input processor: %s' %
                         processor)
            mod = __import__('processors.%s' % processor)
            processor_module = getattr(mod, processor)
            processor_class = getattr(processor_module,
                                      '%sProcessor' % processor.capitalize())
            processor_instance = processor_class(config, jinja_environment,
                                                 data, event, context)
            processor_variables = processor_instance.process()
            template_variables.update(processor_variables)

    jinja_environment.globals = {
        **jinja_environment.globals,
        **template_variables
    }

    if 'processIf' in config:
        processif_template = jinja_environment.from_string(config['processIf'])
        processif_template.name = 'processif'
        processif_contents = processif_template.render()
        if processif_contents.strip() == '':
            logger.info(
                'Will not send message because processIf evaluated to empty.')
            return

    if 'resendBucket' in config:
        if 'resendPeriod' not in config:
            raise NoResendConfigException(
                'No resendPeriod configured, even though resendBucket is set!')

        resend_key_hash = hashlib.sha256()
        if 'resendKey' not in config:
            default_resend_key = template_variables.copy()
            default_resend_key.pop('context')
            resend_key_hash.update(
                json.dumps(default_resend_key).encode('utf-8'))
        else:
            key_template = jinja_environment.from_string(config['resendKey'])
            key_template.name = 'resend'
            key_contents = key_template.render()
            resend_key_hash.update(key_contents.encode('utf-8'))

        resend_file = resend_key_hash.hexdigest()
        logger.debug('Checking for resend object in bucket...',
                     extra={
                         'bucket': config['resendBucket'],
                         'blob': resend_file
                     })
        client_info = grpc_client_info.ClientInfo(
            user_agent='google-pso-tool/pubsub2inbox/1.1.0')

        storage_client = storage.Client(client_info=client_info)
        bucket = storage_client.bucket(config['resendBucket'])
        resend_blob = bucket.blob(resend_file)
        if resend_blob.exists():
            resend_blob.reload()
            resend_period = config['resendPeriod']
            resend_period_parsed = parsedatetime.Calendar().parse(
                resend_period, sourceTime=resend_blob.time_created)
            if len(resend_period_parsed) > 1:
                resend_earliest = datetime.fromtimestamp(
                    mktime(resend_period_parsed[0]))
            else:
                resend_earliest = datetime.fromtimestamp(
                    mktime(resend_period_parsed))

            if datetime.now() >= resend_earliest:
                logger.debug('Resending the message now.',
                             extra={
                                 'resend_earliest': resend_earliest,
                                 'blob_time_created': resend_blob.time_created
                             })
                resend_blob.upload_from_string('')
            else:
                logger.info(
                    'Can\'t resend the message now, resend period not elapsed.',
                    extra={
                        'resend_earliest': resend_earliest,
                        'blob_time_created': resend_blob.time_created
                    })
                return
        else:
            try:
                resend_blob.upload_from_string('', if_generation_match=0)
            except Exception as exc:
                # Handle TOCTOU condition
                if 'conditionNotMet' in str(exc):
                    logger.warning(
                        'Message (re)sending already in progress (resend key already exist).',
                        extra={'exception': exc})
                    return
                else:
                    raise exc
                return

    if 'outputs' in config:
        for output_config in config['outputs']:
            if 'type' not in output_config:
                raise NoTypeConfiguredException(
                    'No type configured for output!')

            if 'processIf' in output_config:
                processif_template = jinja_environment.from_string(
                    output_config['processIf'])
                processif_template.name = 'processif'
                processif_contents = processif_template.render()
                if processif_contents.strip() == '':
                    logger.info(
                        'Will not use output processor %s because processIf evaluated to empty.'
                        % output_config['type'])
                continue

            logger.debug('Processing message using output processor: %s' %
                         output_config['type'])

            output_type = output_config['type']
            mod = __import__('output.%s' % output_type)
            output_module = getattr(mod, output_type)
            output_class = getattr(output_module,
                                   '%sOutput' % output_type.capitalize())
            output_instance = output_class(config, output_config,
                                           jinja_environment, data, event,
                                           context)
            try:
                output_instance.output()
            except Exception as exc:
                logger.error('Output processor %s failed, trying next...' %
                             (output_type),
                             extra={'exception': traceback.format_exc()})
                if 'allOutputsMustSucceed' in config and config[
                        'allOutputsMustSucceed']:
                    raise exc

    else:
        raise NoOutputsConfiguredException('No outputs configured!')
    def __init__(self, date_format=None, **kwargs):
        super(Date, self).__init__(**kwargs)

        self.date_format = date_format
        self.parser = parsedatetime.Calendar(
            version=parsedatetime.VERSION_CONTEXT_STYLE)
Example #6
0
    def __call__(self, args):
        try:
            os.makedirs(args.directory)
        except:
            pass

        since = None
        if args.since:
            since = parsedatetime.Calendar().parse(args.since)
            since = time.mktime(since[0])
            since = datetime.datetime.fromtimestamp(since)

        if args.labels:
            font = ImageFont.truetype("arial.ttf", 14)
        else:
            font = None

        workers = session.query(turkic.models.Worker)
        for worker in workers:
            print "Sampling worker {0}".format(worker.id)

            jobs = session.query(Job)
            jobs = jobs.filter(Job.worker == worker)
            jobs = jobs.join(Segment)
            jobs = jobs.join(Video)
            jobs = jobs.filter(Video.isfortraining == False)

            if since:
                jobs = jobs.filter(turkic.models.HIT.timeonserver >= since)

            jobs = jobs.order_by(sqlalchemy.func.rand())
            jobs = jobs.limit(args.number)

            for job in jobs:
                print "Visualizing HIT {0}".format(job.hitid)
                paths = [x.getboxes(interpolate = True,
                                    bind = True,
                                    label = True) for x in job.paths]

                if args.frames > job.segment.stop - job.segment.start:
                    frames = range(job.segment.start, job.segment.stop + 1)
                else:
                    frames = random.sample(xrange(job.segment.start,
                                                job.segment.stop + 1),
                                           args.frames)

                size = math.sqrt(len(frames))
                video = job.segment.video
                bannersize = (video.width * int(math.floor(size)),
                              video.height * int(math.ceil(size)))
                image = Image.new(video[0].mode, bannersize)
                size = int(math.floor(size))

                offset = (0, 0)
                horcount = 0

                paths = vision.visualize.highlight_paths(video, paths,
                                                         font = font)
                for frame, framenum in paths:
                    if framenum in frames:
                        image.paste(frame, offset)
                        horcount += 1
                        if horcount >= size:
                            offset = (0, offset[1] + video.height)
                            horcount = 0
                        else:
                            offset = (offset[0] + video.width, offset[1])

                image.save("{0}/{1}-{2}.jpg".format(args.directory,
                                                    worker.id,
                                                    job.hitid))
Example #7
0
# limitations under the License.
"""For processing BigQuery timestamp dates."""

import datetime
import logging
import re

import parsedatetime

# Turn off the chatty parsedatetime module's logging
logging.getLogger('parsedatetime').setLevel(logging.ERROR)

# Initialize parsedatetime
pdt_constants = parsedatetime.Constants()
pdt_constants.BirthdayEpoch = 50  # TODO(user) provide a way to set this.
pdt = parsedatetime.Calendar(pdt_constants)

# e.g. 1989-10-02 05:23:48 1958-06-24T12:18:35.5803 1988-08-15T19:06:56.235
TIMESTAMP_RE = re.compile(r'^\d{4}-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01])'
                          r'[ T]([01]\d|2[0-3]):[0-5]\d'
                          r':([0-5]\d|60)(\.\d{3,4})?'  # Leap seconds!
                          r'( [+-][012]\d:[0-5]\d)?$')

# YYYY-MM-DD HH:MM:SS.micro +08:00
OUTPUT_TIMESTAMP_FORMAT = '%Y-%m-%d %H:%M:%S.%f %z'

INPUT_TIMESTAMP_FORMATS = (
    '%Y-%m-%d %H:%M:%S.%f %z',
    '%Y/%m/%d %H:%M:%S.%f %z',
    '%m/%d/%Y %H:%M:%S.%f %z',
    '%m/%d/%y %H:%M:%S.%f %z',
Example #8
0
 async def mute(self, ctx, *, member = None, cooldown = None):
     """Mencegah member untuk mengirim pesan dalam chat atau berbicara dalam voice channel(bot-admin/admin only)."""
     # isOwner = self.settings.isOwner(ctx.author)
     # if isOwner == False:
     #     await ctx.send("Fitur ini sedang dalam tahap ujicoba")
     #     return
     if not await Utils.is_bot_admin_reply(ctx): return
     if member == None:
         em = discord.Embed(color = 0XFF8C00, description = "Mencegah member untuk mengirim pesan dalam chat atau berbicara dalam voice channel\n\n"
                                                            "**Panduan**\n"
                                                            "`{}mute [member] [cooldown]`"
                                                            .format(ctx.prefix))
         em.set_footer(text = "{}#{}".format(ctx.author.name, ctx.author.discriminator), icon_url = f"{ctx.author.avatar_url}")
         return await ctx.send(embed=em)
     # Let's search for a name at the beginning - and a time at the end
     parts = member.split()
     for j in range(len(parts)):
         # Reverse search direction
         i = len(parts)-1-j
         memFromName = None
         endTime     = None
         # Name = 0 up to i joined by space
         nameStr = ' '.join(parts[0:i+1])
         # Time = end of name -> end of parts joined by space
         timeStr = ' '.join(parts[i+1:])
         memFromName = DisplayName.memberForName(nameStr, ctx.guild)
         if memFromName:
             # We got a member - let's check for time
             # Get current time - and end time
             try:
                 # Get current time - and end time
                 currentTime = int(time.time())
                 cal         = parsedatetime.Calendar()
                 time_struct, parse_status = cal.parse(timeStr)
                 start       = datetime(*time_struct[:6])
                 end         = time.mktime(start.timetuple())
                 # Get the time from now to end time
                 endTime = end-currentTime
             except:
                 pass
             if not endTime == None:
                 # We got a member and a time - break
                 break
     if memFromName == None:
         # We couldn't find one or the other
         em = discord.Embed(color = 0XFF8C00, description = "> Mencegah member untuk mengirim pesan dalam chat atau berbicara dalam voice channel\n> \n"
                                                            "> **Panduan**\n"
                                                            "> `{}mute [member] [cooldown]`"
                                                            .format(ctx.prefix))
         em.set_author(name = "Oops!", url = "https://acinonyxesports.com/", icon_url = "https://cdn.discordapp.com/attachments/518118753226063887/725569194304733435/photo.jpg")
         em.set_footer(text = f"Request By : {ctx.author.name}", icon_url = f"{ctx.author.avatar_url}")
         msg = 'Usage: `{}mute [member] [cooldown]`'.format(ctx.prefix)
         return await ctx.send(msg)
     cooldown = None if endTime == 0 else endTime
     member   = memFromName
     # Check if we're muting ourself
     if member is ctx.author:
         msg = 'Akan lebih mudah untuk ku kalo kamu sendiri yang diam!'
         em = discord.Embed(color = 0XFF8C00, description = msg)
         em.set_footer(text = "{}".format(ctx.author),
                       icon_url = f"{ctx.author.avatar_url}")
         return await ctx.send(embed = em)
     # Check if we're muting the bot
     if member.id == self.bot.user.id:
         msg = '┐( ̄ヘ ̄;)┌\nAku nggk mau mute diri sendiri.'
         em = discord.Embed(color = 0XFF8C00, description = msg)
         em.set_footer(text = "{}".format(ctx.author),
                       icon_url = f"{ctx.author.avatar_url}")
         return await ctx.send(embed = em)
     # Check if member is admin or bot admin
     if await Utils.is_bot_admin_reply(ctx,member=member,message="┐( ̄ヘ ̄;)┌\nKamu tidak dapat melakukan mute pada admin lain.",message_when=True): return
     # Set cooldown - or clear it
     if type(cooldown) is int or type(cooldown) is float:
         if cooldown < 0:
             msg = '┐( ̄ヘ ̄;)┌\nCooldown tidak dapat dilakukan dengan angka negatif!'
             em = discord.Embed(color = 0XFF8C00, description = msg)
             em.set_footer(text = "{}#{}".format(ctx.author),
                           icon_url = f"{ctx.author.avatar_url}")
             return await ctx.send(embed = em)
         currentTime = int(time.time())
         cooldownFinal = currentTime+cooldown
     else:
         cooldownFinal = None
     # Check if we're using the old mute and suggest the quicker version
     try: role = ctx.guild.get_role(int(self.settings.getServerStat(ctx.guild,"MuteRole")))
     except: role = None
     msg = "Muting...{}".format(
         "" if role else "\n\nKamu dapat membuat memilih role dengan cara mengetik `{}setmuterole [role]`\nAtau `{}createmuterole [role_name]` untuk mute member ***lebih cepat***.".format(ctx.prefix,ctx.prefix)
         )
     em = discord.Embed(color = 0XFF8C00, description = msg)
     em.set_footer(text = "{}".format(ctx.author), icon_url = "{}".format(ctx.author.avatar_url))
     mess = await ctx.send(embed = em)
     # Do the actual muting
     await self._mute(member, ctx.guild, cooldownFinal, ctx.author)
     if cooldown:
         mins = "menit"
         checkRead = ReadableTime.getReadableTimeBetween(currentTime, cooldownFinal)
         msg = '*{}* telah di **Mute** hingga *{}*.'.format(DisplayName.name(member), checkRead)
         # pm  = 'You have been **Muted** by *{}* for *{}*.\n\nYou will not be able to send messages on *{}* until either that time has passed, or you have been **Unmuted**.'.format(DisplayName.name(ctx.author), checkRead, Utils.suppressed(ctx, ctx.guild.name))
     else:
         msg = '*{}* telah di **Mute** *hingga pemberitahuan lebih lanjut*.'.format(DisplayName.name(member))
         # pm  = 'You have been **Muted** by *{}* *until further notice*.\n\nYou will not be able to send messages on *{}* until you have been **Unmuted**.'.format(DisplayName.name(ctx.author), Utils.suppressed(ctx, ctx.guild.name))
     await mess.edit(content=Utils.suppressed(ctx,msg))
     '''try:
Example #9
0
def run():
    # print("Test")
    from bs4 import BeautifulSoup
    import requests
    from main_site.models import NewsItem

    import parsedatetime
    from datetime import datetime

    print("-" * 100)
    print("Hackernews parser")
    print("-" * 100)

    # url="https://news.ycombinator.com/"
    urls = [
        "https://news.ycombinator.com/news?p=1",
        "https://news.ycombinator.com/news?p=2",
        "https://news.ycombinator.com/news?p=3"
    ]
    # res=requests.get(url,stream=True)
    tr1s = []
    tr2s = []
    for u in urls:
        res = requests.get(u)
        html = res.content
        soup = BeautifulSoup(html)
        tr1s_tmp = []
        tr1s_tmp = soup.findAll('tr', {'class': 'athing'})
        tr1s += tr1s_tmp
        for tr1 in tr1s_tmp:
            tr2s.append(tr1.findNextSibling())

    url = []
    title = []
    hacker_news_url = []
    upvote_count = []
    comment_count = []
    posted_on = []
    for i in range(len(tr1s)):
        url.append(tr1s[i].select_one('td:nth-of-type(3) > a').get('href'))

        title.append(tr1s[i].select_one('td:nth-of-type(3) > a').get_text())

        # hacker_news_url.append(tr1s[i].select_one('td:nth-of-type(3) > span > a').get('href'))
        # hacker_news_url.append("https://news.ycombinator.com/"+tr2s[i].select_one('td:nth-of-type(2) > a:nth-of-type(3)').get('href'))
        if tr2s[i].select_one('td:nth-of-type(2) > span.age > a') == None:
            hacker_news_url.append('')
        else:
            hacker_news_url.append(
                "https://news.ycombinator.com/" + tr2s[i].select_one(
                    'td:nth-of-type(2) > span.age > a').get('href'))

        if tr2s[i].select_one('td:nth-of-type(2) > span.score') == None:
            upvote_count.append('0')
        else:
            upvote_count.append(tr2s[i].select_one(
                'td:nth-of-type(2) > span.score').get_text().split()[0])

        #exception handling
        if tr2s[i].select_one('td:nth-of-type(2) > a:nth-of-type(3)') == None:
            comment_count.append('0')
        elif tr2s[i].select_one('td:nth-of-type(2) > a:nth-of-type(3)'
                                ).get_text() == "discuss":
            comment_count.append('0')
        else:
            comment_count.append(tr2s[i].select_one(
                'td:nth-of-type(2) > a:nth-of-type(3)').get_text().split()[0])

        if tr2s[i].select_one('td:nth-of-type(2) > span.age > a') == None:
            posted_on.append('')
        else:
            posted_on.append(tr2s[i].select_one(
                'td:nth-of-type(2) > span.age > a').get_text())

    # test=['7 hours ago', '58 minutes ago', '13 hours ago']
    cal = parsedatetime.Calendar()
    date_list = []
    date_db = []
    for date_str in posted_on:
        time_struct, parse_status = cal.parse(date_str)
        res = datetime(*time_struct[:6])
        # date_list.append(res)
        date_db.append(res.strftime('%Y-%m-%d %H:%M:%S'))

    # result = list(reversed([x for _,x in sorted(zip(date_list, posted_on))]))
    print(posted_on)
    # print(date_list)
    print(date_db)
    # date_list[0].strftime('%Y-%m-%d %H:%M:%S')
    # print(result)

    for i in range(len(url)):
        item, created = NewsItem.objects.get_or_create(url=url[i])
        if created:
            print('New item was created')
            item.title = title[i]
            item.hacker_news_url = hacker_news_url[i]
            item.posted_on = date_db[i]
            item.comment_count = comment_count[i]
            item.upvote_count = upvote_count[i]
        else:
            print('updating current item')
            item.title = title[i]
            item.hacker_news_url = hacker_news_url[i]
            item.posted_on = date_db[i]
            item.comment_count = comment_count[i]
            item.upvote_count = upvote_count[i]

        item.save()
        # obj=NewsItem(url=url[i],title=title[i],hacker_news_url=hacker_news_url[i],posted_on=posted_on[i],comment_count=comment_count[i],upvote_count=upvote_count[0])
        # obj.save()

    print(url)
    print(title)
    print(hacker_news_url)
    print(upvote_count)
    print(comment_count)
    # print(posted_on)
    print(date_db)
Example #10
0
def parse_date(dtstring):
    if dtstring.strip():
        time_struct, parse_status = dtparser.Calendar().parse(dtstring)
        return datetime.fromtimestamp(mktime(time_struct))
    else:
        return ""
Example #11
0
def validate_content(file, content_dir):
    """
	Validate content for a file based on the rules listed in __doc__
		:file: (str) the name of the content file to validate
		:content_dir: (str) the path to the content directory
	"""
    with open(file, 'r') as f:
        file_contents = [
            content.replace('\n', '') for content in f.readlines()
        ]

    contents = {}
    for line in file_contents:
        items = line.split(':')
        field, value = cleanse(items[0]), cleanse(':'.join(items[1:]))
        contents[field] = value

    status_code = 0
    filename = file
    field = ""
    short_message = ""
    long_message = "n/a"

    loop = True
    while (loop):
        loop = False

        # validate filetype
        field = ''
        filetype = os.path.splitext(file)[-1]
        if not filetype == '.md':
            status_code = 1
            short_message = "invalid filetype"
            long_message = f"File type `{filetype}` is not supported. Please ensure that your contribution is written in a Markdown file (`.md`)."
            break

        # validate name
        field = 'name'
        if not 'name' in contents:
            status_code = 1
            short_message = "missing required field `name`"
            break
        if contents['name'] == '':
            status_code = 1
            short_message = "empty required field `name`"
            break
        if not str(contents['name']):
            status_code = 1
            short_message = "invalid field: `name`"
            break
        if not len(contents['name']) <= 100:
            status_code = 1
            short_message = "length of field `name` exceeds 100 characters"
            break

        # validate author
        field = 'author'
        if not 'author' in contents:
            status_code = 1
            short_message = "missing required field `author`"
            break
        if contents['author'] == '':
            status_code = 1
            short_message = "empty required field `author`"
            break
        if not str(contents['author']):
            status_code = 1
            short_message = "invalid field `author`"
            break
        if not len(contents['author']) <= 100:
            status_code = 1
            short_message = "length of field `author` exceeds 100 characters"
            break

        # validate author_github
        field = 'author_github'
        author_github = contents.get('author_github', '')
        if not author_github == '':
            if not str(author_github):
                status_code = 1
                short_message = "invalid field `author_github`"
                break
            if not 'github.com' in author_github:
                status_code = 1
                short_message = "field `author_github` must be a GitHub URL"
                break
            try:
                response = requests.get(author_github, timeout=15)
                if not response:
                    status_code = 1
                    short_message = "URL provided for field `author_github` returned an error HTTP code when accessed"
                    long_message = f"The result of accessing \"{contents['author_github']}\" resulted in an HTTP response code of `{response.status_code}`, which is an error."
                    break
            except Exception as e:
                status_code = 1
                short_message = "URL provided for field `author_github` is inaccessible"
                long_message = f"Trying to access \"{contents['author_github']}\" resulted in an unknown exception, likely indicating that the URL is invalid."
                break

        # validate blurb
        field = 'blurb'
        if not 'blurb' in contents:
            status_code = 1
            short_message = "missing required field `blurb`"
            break
        if contents['blurb'] == '':
            status_code = 1
            short_message = "empty required field `blurb`"
            break
        if not str(contents['blurb']):
            status_code = 1
            short_message = "invalid field `blurb`"
            break
        if not len(contents['blurb']) <= 100:
            status_code = 1
            short_message = "length of field `blurb` exceeds 100 characters"
            break

        # validate description
        field = 'description'
        if not 'description' in contents:
            status_code = 1
            short_message = "missing required field `description`"
            break
        if contents['description'] == '':
            status_code = 1
            short_message = "empty required field `description`"
            break
        if not str(contents['description']):
            status_code = 1
            short_message = "invalid field `description`"
            break
        if not len(contents['description']) <= 1000:
            status_code = 1
            short_message = "length of field `description` exceeds 1000 characters"
            break

        # validate url
        field = 'url'
        if not 'url' in contents:
            status_code = 1
            short_message = "missing required field `url`"
            break
        if contents['url'] == '':
            status_code = 1
            short_message = "empty required field `url`"
            break
        if not str(contents['url']):
            status_code = 1
            short_message = "invalid field `url`"
            break
        if 'github.com' in contents['url']:
            status_code = 1
            short_message = "URL provided for field `url` is a GitHub repository"
            long_message = f"It appears the URL for this contribution ({contents['url']}) is a GitHub repository, not a published web app. Unfortunately, Soliloquy is not a portfolio for source code. Please refer to the FAQs for more information: https://www.soliloquy.dev/about/"
            break
        try:
            response = requests.get(contents['url'], timeout=15)
            if not response:
                status_code = 1
                short_message = "URL provided for field `url` returned an error"
                long_message = f"The result of accessing \"{contents['url']}\" resulted in an HTTP response code of `{response.status_code}`, which is an error."
                break
        except Exception as e:
            status_code = 1
            short_message = "URL provided for field `url` is inaccessible"
            long_message = f"Trying to access \"{contents['url']}\" resulted in an unknown exception, likely indicating that the URL is invalid."
            break

        # validate img
        field = 'img'
        img = contents.get('img', '')
        if not img == '':
            reserved_file_names = ['about', 'default', 'willcarhartportfolio']
            supported_filetypes = ['.png', '.jpg', '.jpeg', '.gif']
            image_filename, filetype = os.path.splitext(img)
            if not filetype in supported_filetypes:
                status_code = 1
                short_message = f"Value of field `img` file type not supported (`{filetype}`)"
                long_message = "Supported image file types are `.png`, `.jpg`, `.jpeg`, and `.gif`."
                break
            if image_filename in reserved_file_names:
                status_code = 1
                short_message = f"filename provided for field `img` is prohibited (`{image_filename}`)"
                long_message = "Some filenames are reserved for the system, as they are used elsewhere in Soliloquy's assets and thus are prohibited for use in names of contribution images. These filenames are `about`, `default`, and `willcarhartportfolio`, extension agnostic."
                break
            if not os.path.isfile(f'{content_dir}/app_img/{img}'):
                status_code = 1
                short_message = f"filename provided for field `img` not found, no such file `{os.path.basename(content_dir)}/app_img/{img}`"
                long_message = f"Make sure to add your image file to `{os.path.basename(content_dir)}/app_img/`, as this is where Soliloquy will look for it."
                break

        # validate timestamp
        field = 'timestamp'
        if not 'timestamp' in contents:
            status_code = 1
            short_message = "missing required field `timestamp`"
            break
        if contents['timestamp'] == '':
            status_code = 1
            short_message = "empty required field `timestamp`"
            break
        if any(
                re.match(regex, contents['timestamp'])
                for regex in ['../../.*', '.*/../..', '..-..-.*', '.*-..-..']):
            status_code = 1
            short_message = "value provided for field `timestamp` is ambiguous"
            long_message = f"The timestamp you provided (`{contents['timestamp']}`) is ambiguous. This means that its value is not deterministic. For example, **\"05/06/2018\"** could be interpreted as **May 6th, 2018** or **June 5th, 2018**."
            break
        try:
            parse = parsedatetime.Calendar()
            time_struct, parse_status = parse.parse(value)
            if not parse_status == 1:
                status_code = 1
                short_message = f"invalid field `timestamp`"
                long_message = f"The provided timestamp `{contents['timestamp']}` could not be parsed. Try using the format **Month Day, Year**, like August 7th, 2019."
                break
            dt = datetime.datetime(*time_struct[:6])
            timestamp = dt.timestamp()
        except ValueError:
            status_code = 1
            short_message = "invalid field `timestamp`"
            long_message = f"The provided timestamp `{contents['timestamp']}` could not be parsed. Try using the format **Month Day, Year**, like August 7th, 2019."
            break

    return ContentError(status_code=status_code,
                        filename=os.path.basename(filename),
                        field=field,
                        short_message=short_message,
                        long_message=long_message)
Example #12
0
 def onLoad(self):
     self.cal = parsedatetime.Calendar()
Example #13
0
 def __init__(self, name, emitter=None):
     super(ScheduledSkill, self).__init__(name, emitter)
     self.timer = None
     self.calendar = pdt.Calendar()
     self.time_rules = time_rules.create(self.lang)
     self.init_format()
Example #14
0
#!/usr/bin/env python
import time, calendar, parsedatetime as pdt, pytz, re
from datetime import datetime
from time import mktime
from pytz import timezone
from dateutil import tz

c = pdt.Constants()
c.BirthdayEpoch = 80
p = pdt.Calendar(c)
dateFormats = [
    "%d/%m/%y-%H:%M:%S", "%d/%m/%y", "%d/%m/%Y", "%d/%m/%y-%H:%M",
    "%d/%m/%Y-%H:%M:%S", "%d/%m/%y-%H:%M", "%H:%M:%S"
]


#will try to match a time given in a form of hh:mm:ss from current time
def regexmatch(timestring):

    pattern = re.compile("^(?:(?:([0-9])*:)?([0-9]*?\d):)?([0-9]*\d)$")
    hours = mins = secs = 0
    if pattern.match(timestring):
        args = timestring.split(":")
        if len(args) == 3:
            hours = int(args[0])
            mins = int(args[1])
            secs = int(args[2])
        elif len(args) == 2:
            mins = int(args[0])
            secs = int(args[1])
        else:
Example #15
0
    def __init__(self, date_format=None, **kwargs):
        super(Date, self).__init__(**kwargs)

        self.date_format = date_format
        self.parser = parsedatetime.Calendar()
Example #16
0
 def setUp(self):
     self.cal = pdt.Calendar()
     self.yr, self.mth, self.dy, self.hr, self.mn, self.sec, self.wd, self.yd, self.isdst = time.localtime()
Example #17
0
 def parse(*args, **kwargs):
     cal = parsedatetime.Calendar()
     dt, flag = cal.parse(*args, **kwargs)
     return dt
Example #18
0
    def __init__(self):

        self.date_constants = parsedatetime.Constants()
        self.date_calendar = parsedatetime.Calendar(self.date_constants)
Example #19
0
    def parse_other_answer_page(self, response):
        c = pdt.Constants()
        p = pdt.Calendar(c)
        f = '%Y-%m-%d %H:%M:%S'
        hxs = HtmlXPathSelector(response)
        all_answer = hxs.xpath('//ul[contains(@id,"ya-qn-answers")]/li')
        current_ans_id = response.meta['ult_ans_id']
        for single_answer in all_answer:
            item = YahooItem()
            ans_data = single_answer.xpath(
                './/div[contains(@class,"Pt-15")]/span[contains(@class, "Clr-88")]'
            ).extract()
            data_string = html2text.html2text(ans_data[0])
            data_format = p.parseDT(
                str(
                    data_string.encode("utf8").replace("\xc2\xb7",
                                                       "").strip()))
            item['date_time'] = data_format[0].strftime(f)
            item['uid'] = str(
                str(response.meta['quest_id']) + "." + str(current_ans_id))
            item['type'] = "answer"
            item['tags'] = "N/A"
            item['title'] = ""
            item['resolve'] = ""
            item['answers'] = 0
            item['views'] = 0
            text_to_gain = single_answer.xpath(
                './/a[contains(@class,"uname Clr-b")]').extract()
            if text_to_gain:
                h = html2text.HTML2Text()
                h.ignore_links = True
                author_string = h.handle(text_to_gain[0])
                item['author'] = str(
                    author_string.encode('utf-8', 'ignore').strip())
            else:
                item['author'] = "anonymous"

            item['url'] = response.url

            text_to_gain = single_answer.xpath(
                './/span[contains(@class,"ya-q-full-text")][@itemprop="text"]'
            ).extract()
            if text_to_gain:
                item['text'] = html2text.html2text(text_to_gain[0]).encode(
                    'utf-8', 'ignore')
            else:
                item['text'] = ""

            text_to_gain = single_answer.xpath(
                './/div[contains(@class,"D-ib Mend-10 Clr-93")]/div[1]/div[1]'
            ).extract()
            if text_to_gain:
                item['upvotes'] = int(html2text.html2text(text_to_gain[0]))
            else:
                item['upvotes'] = 0

            current_ans_id = current_ans_id + 1
            yield item

        try:
            if (hxs.xpath('//div[contains(@id, "ya-qn-pagination")]' +
                          '/a[contains(@class,"Clr-bl")][last()]/@href')):
                url_of_the_next_page = hxs.xpath(
                    '//div[contains(@id, "ya-qn-pagination")]' +
                    '/a[contains(@class,"Clr-bl")][last()]/@href').extract()
                next_page_composed = "https://hk.answers.yahoo.com" + \
                                     url_of_the_next_page[0]
                request = scrapy.Request(next_page_composed,
                                         callback=self.parse_other_answer_page)
                request.meta['quest_id'] = response.meta['quest_id']
                request.meta['ult_ans_id'] = current_ans_id
                yield request
        except NoSuchElementException:
            pass
    def process_url(self, url):
        """
        For the given url find or create an entry for each source in the database.

        If no sources found raise an exception.
        """

        h = httplib2.Http( settings.HTTPLIB2_CACHE_DIR )
        response, content = h.request(url)
        # print content

        # parse content
        soup = BeautifulSoup(
            content,
            convertEntities=BeautifulStoneSoup.HTML_ENTITIES
        )

        spans = soup.findAll( 'span', 'contenttype-repositoryitem summary')

        links = [ span.a for span in spans ]

        # Check that we found some links. This is to detect when the page changes or our
        # scraper breaks (see issue #905 for example). Checking that the most recent
        # source is not more that X weeks old might also be a good idea, but could lead
        # to lots of false positives as there is often a long hiatus.
        if not len(links):
            raise NoSourcesFoundError()

        for link in links:

            # print '==============='
            # print link

            href = link['href'].strip()
            # print "href: " + href

            name = ' '.join(link.contents).strip()
            # print "name: " + name

            if not Source.objects.filter(name=name).exists():

                cal = pdt.Calendar()
                # Sometimes the space is missing between before the
                # month, so insert that if it appears to be missing:
                tidied_name = re.sub(r'(\d+(st|nd|rd|th))(?=[^ ])', '\\1 ', name)
                # Commas in the name confuse parsedatetime, so strip
                # them out too:
                tidied_name = re.sub(r',', '', tidied_name)
                result = cal.parseDateText(tidied_name)
                source_date = datetime.date(*result[:3])
                # print "source_date: " + str(source_date)


                # I don't trust that we can accurately create the download link url with the
                # details that we have. Instead fetche the page and extract the url.
                download_response, download_content = h.request(href)
                download_soup = BeautifulSoup(
                    download_content,
                    convertEntities=BeautifulStoneSoup.HTML_ENTITIES
                )
                download_div = download_soup.find( id="archetypes-fieldname-item_files" )
                if not download_div:
                    warn("Failed to find the download div on {0}".format(href))
                    continue

                download_url = download_div.a['href']
                # print download_url

                # create the source entry
                Source.objects.create(
                    name = name,
                    url = download_url,
                    date = source_date,
                )
Example #21
0
    def parse_page(self, response):
        # Time tools
        c = pdt.Constants()
        p = pdt.Calendar(c)
        f = '%Y-%m-%d %H:%M:%S'
        now = datetime.datetime.now()
        # Start to scraping a single question

        # Checking question category
        try:
            hxs = HtmlXPathSelector(response)
            category = hxs.xpath(
                '(//a[contains(@class,"Clr-b")])[2]').extract()
            h = html2text.HTML2Text()
            h.ignore_links = True
            category_text = h.handle(category[0])
            url_category = str(category_text).encode('utf8').strip()
        except IndexError:
            print(str(self.uid) + "Warning: this Url is not more available...")
            url_category = "Error"

        # If the question is related to programming and design
        # start item creation process
        # if "程式編寫" and "設計" in url_category:
        if (True):
            # increment id
            # copy id and use uid_copy in order to preserve from concurrent request
            self.uid = self.uid + 1
            uid_copy = self.uid

            # Print current uid any 100 times
            if self.uid % 100 == 0:
                print(str(self.uid))
            # Initialize scrapy item
            item = YahooItem()
            # Read in the date field associated to URL if info data are present
            for istance in self.url_to_scrape:
                if response.url == istance.url:
                    if istance.date == "not available":
                        item['date_time'] = "not available"
                        break
                    else:
                        data_format = p.parseDT(
                            str(
                                str(istance.date).replace("\xc2\xb7",
                                                          "").strip()))
                        item['date_time'] = data_format[0].strftime(f)
                        break
            item['type'] = "question"
            item['uid'] = uid_copy
            item['url'] = response.url
            item['tags'] = "N/A"
            item['views'] = 0
            item['upvotes'] = 0
            text_to_gain = hxs.xpath('//h1').extract()
            # Take title of the question
            item['title'] = (html2text.html2text(
                text_to_gain[0]).encode("utf8").strip())
            # Take text from the question
            full_text_answer = hxs.xpath(
                '//span[contains(@class,"ya-q-full-text Ol-n")]').extract()
            if full_text_answer:
                item['text'] = html2text.html2text(full_text_answer[0]).encode(
                    'utf-8', 'ignore')
            else:
                text_to_gain = hxs.xpath(
                    '//span[contains(@class,"ya-q-text")]').extract()
                if text_to_gain:
                    item['text'] = html2text.html2text(text_to_gain[0]).encode(
                        'utf-8', 'ignore')
            # Take username of the questioner
            text_to_gain = hxs.xpath(
                '//div[contains(@id,"yq-question-detail-profile-img")]' +
                '/a/img/@alt').extract()
            if text_to_gain:
                try:
                    h = html2text.HTML2Text()
                    h.ignore_links = True
                    author_string = h.handle(text_to_gain[0])
                    item['author'] = author_string.encode('utf-8',
                                                          'ignore').strip()
                # Handle HTMLtoText except
                except:
                    item['author'] = "anonymous"
            else:
                item['author'] = "anonymous"
            text_to_gain = hxs.xpath(
                '(//div[contains(@class,"Mend-10 Fz-13 Fw-n D-ib")])' +
                '[2]/span[2]').extract()
            # Read number of answers
            if text_to_gain:
                if " answers" in (str(html2text.html2text(
                        text_to_gain[0])).strip()):
                    item['answers'] = int(
                        str(html2text.html2text(text_to_gain[0])).replace(
                            " answers", "").strip())
                else:
                    if " answer" in (str(html2text.html2text(
                            text_to_gain[0])).strip()):
                        item['answers'] = int(
                            str(html2text.html2text(text_to_gain[0])).replace(
                                " answer", "").strip())
            else:
                item['answers'] = 0
            # Check if question is closed (resolve with a best answer)
            text_to_gain = hxs.xpath(
                '//span[contains(@class,"ya-ba-title Fw-b")]/text()').extract(
                )
            if text_to_gain:
                item['resolve'] = "True"
            else:
                item['resolve'] = "False"

            # yield item for the question istance
            yield item

            # Taking the best answer if present

            if hxs.xpath('//div[contains(@id,"ya-best-answer")]'):
                ans_uid = 1
                item = YahooItem()
                ans_data = hxs.xpath(
                    '(//div[contains(@class,"Pt-15")]/' +
                    'span[contains(@class, "Clr-88")])[1]').extract()
                data_string = html2text.html2text(ans_data[0]).strip()
                data_format = p.parseDT(
                    str(
                        data_string.encode("utf8").replace("\xc2\xb7",
                                                           "").strip()))
                item['date_time'] = data_format[0].strftime(f)
                item['uid'] = str(str(uid_copy) + ("." + str(ans_uid)))
                item['type'] = "answer"
                item['resolve'] = "solution"
                item['tags'] = "N/A"
                item['title'] = ""
                item['answers'] = 0
                item['views'] = 0
                best_text = hxs.xpath(
                    '(//span[contains(@class,"ya-q-full-text")])[1]').extract(
                    )
                item['text'] = html2text.html2text(best_text[0]).encode(
                    'utf-8', 'ignore')
                text_to_gain = hxs.xpath(
                    '(//a[contains(@class,"uname Clr-b")])[1]').extract()
                if text_to_gain:
                    h = html2text.HTML2Text()
                    h.ignore_links = True
                    author_string = h.handle(text_to_gain[0])
                    item['author'] = str(
                        author_string.encode('utf-8', 'ignore').strip())
                else:
                    item['author'] = "anonymous"
                upvote_text = hxs.xpath(
                    '(//div[contains(@class,"D-ib Mstart-23 count")])[1]/text()'
                ).extract()
                item['upvotes'] = int(
                    str(html2text.html2text(upvote_text[0])).strip())
                item['url'] = response.url
                ans_uid = ans_uid + 1
                yield item

            else:
                ans_uid = 1

            # Taking all the other answers
            all_answer = hxs.xpath('//ul[contains(@id,"ya-qn-answers")]/li')
            for single_answer in all_answer:
                item = YahooItem()
                # In this case data is always present
                ans_data = single_answer.xpath(
                    './/div[contains(@class,"Pt-15")]/span[contains(@class, "Clr-88")]'
                ).extract()
                data_string = html2text.html2text(ans_data[0])
                data_format = p.parseDT(
                    str(
                        data_string.encode("utf8").replace("\xc2\xb7",
                                                           "").strip()))
                item['date_time'] = data_format[0].strftime(f)
                item['uid'] = str(str(uid_copy) + ("." + str(ans_uid)))
                item['tags'] = "N/A"
                item['title'] = ""
                item['answers'] = 0
                item['views'] = 0
                item['type'] = "answer"
                item['resolve'] = ""
                text_to_gain = single_answer.xpath(
                    './/a[contains(@class,"uname Clr-b")]').extract()
                if text_to_gain:
                    h = html2text.HTML2Text()
                    h.ignore_links = True
                    author_string = h.handle(text_to_gain[0])
                    item['author'] = str(
                        author_string.encode('utf-8', 'ignore'))
                else:
                    item['author'] = "anonymous"
                # Take url of the question becouse answer don't have URL ref
                item['url'] = response.url
                # Check if is present long text version of the answer
                text_to_gain = single_answer.xpath(
                    './/span[contains(@class,"ya-q-full-text")][@itemprop="text"]'
                ).extract()
                if text_to_gain:
                    item['text'] = html2text.html2text(text_to_gain[0]).encode(
                        'utf-8', 'ignore')
                else:
                    item['text'] = ""

                text_to_gain = single_answer.xpath(
                    './/div[contains(@class,"D-ib Mend-10 Clr-93")]/div[1]/div[1]'
                ).extract()
                if text_to_gain:
                    item['upvotes'] = int(
                        str(html2text.html2text(text_to_gain[0])).strip())
                else:
                    item['upvotes'] = 0

                ans_uid = ans_uid + 1
                yield item
            # Checking if there are more then 10 answers
            # in this case there are other answers in other page
            try:
                if (hxs.xpath('//div[contains(@id, "ya-qn-pagination")]' +
                              '/a[contains(@class,"Clr-bl")][last()]/@href')):
                    url_of_the_next_page = hxs.xpath(
                        '//div[contains(@id, "ya-qn-pagination")]' +
                        '/a[contains(@class,"Clr-bl")][last()]/@href').extract(
                        )
                    next_page_composed = "https://hk.answers.yahoo.com" + \
                                         url_of_the_next_page[0]
                    # Go to the next page and take more urls
                    # passing uid as parameter
                    request = scrapy.Request(
                        next_page_composed,
                        meta={'ans_id': uid_copy},
                        callback=self.parse_other_answer_page)
                    request.meta['quest_id'] = uid_copy
                    request.meta['ult_ans_id'] = ans_uid
                    yield request
            except NoSuchElementException:
                pass
        else:
            print(str(self.uid) + " question not available or not related")
            print(str(response.url))
    def setUp(self):
        self.ptc = pdt.Constants('en_AU', usePyICU=False)
        self.cal = pdt.Calendar(self.ptc)

        self.yr, self.mth, self.dy, self.hr, self.mn, self.sec, self.wd, self.yd, self.isdst = time.localtime(
        )
Example #23
0
def parse_time(time_str):
    cal = parsedatetime.Calendar()
    time_struct, _ = cal.parse(time_str)
    return datetime(*time_struct[:6])