def get_new_ephemeris(targetid, verbose=True):
    # read in the new ephemeris provided by Joel Hartman
    ephem_path = (os.path.join(
        DATADIR, 'updated_ephemerides/{}/{}.updateephem.txt'.format(
            today_YYYYMMDD(), targetid)))

    with open(ephem_path, 'r') as f:
        lines = f.readlines()

    epoch = [l for l in lines if '- Epoch: ' in l][0]
    period = [l for l in lines if '- Period: ' in l][0]
    dur = [l for l in lines if '- Transit duration: ' in l][0]

    if verbose:
        print(epoch, period, dur)

    epoch = float(search('{} - Epoch: {} +/- {}', epoch)[1].strip())
    period = float(search('{} - Period: {} +/- {}', period)[1].strip())
    dur = float(search('{} - Transit duration: {} +/- {}', dur)[1].strip())

    if verbose:
        print(epoch, period, dur)

    ephem_dict = {'period': period, 'epoch': epoch, 'duration': dur}

    return ephem_dict
Ejemplo n.º 2
0
def resolve_git_shortcut(git_shortcut):
    result = parse.parse("{:w}/{:w}#{:w}", git_shortcut)
    if not result:
        result = parse.parse("{:w}/{:w}", git_shortcut)
    if not result:
        return False

    username = result.fixed[0]
    project = result.fixed[1]

    git_tag = None
    if len(result.fixed) > 2:
        git_tag = result.fixed[2]

    r = requests.get(
        "https://raw.githubusercontent.com/{0}/{1}/master/setup.py".format(
            username, project))
    if r.status_code == 404:
        return False

    result = parse.search("name='{}'", r.content)
    result2 = parse.search('name="{}"', r.content)
    if result:
        egg_name = result.fixed[0]
    elif result2:
        egg_name = result2.fixed[0]
    else:
        egg_name = project

    if git_tag:
        return "git+https://github.com/{0}/{1}.git@{2}#egg={3}".format(
            username, project, git_tag, egg_name)
    else:
        return "git+https://github.com/{0}/{1}.git#egg={2}".format(
            username, project, egg_name)
Ejemplo n.º 3
0
def publish(logline, rootname, cache):
    global face, keychain
    # Pull out and parse datetime for log entry
    # (note we shoudld use point time for timestamp)
    try:
        if not ": (point" in logline: return
        logdtstr = parse.search("[{}]", logline)[0]
        point = parse.search("(point {})", logline)[0].split(" ")
    except Exception as detail:
        print("publish: Parse error for", logline, "-", detail)
        return
    try:
        logdt = datetime.strptime(logdtstr, "%Y-%m-%d %H:%M:%S.%f")
    except Exception as detail:
        print("publish: Date/time conversion error for", logline, "-", detail)
        return

    name = pointNameToName(point[0], rootname)
    data_json, data_dict = pointToJSON(point)

    if name is not None:
        print("Publishing log entry", logdt, "to", name,
              data_dict["timestamp"], "payload:", data_json)
        try:
            cache.add(createData(name, data_dict["timestamp"], data_json))
        except Exception as detail:
            print("publish: Error calling createData for", logline, "-",
                  detail)
Ejemplo n.º 4
0
	def parse_region(self, region):
		region = "".join(region.split())
		try:
			name = parse.search(";@{:w}:", region).fixed[0]
			reg = parse.search(":({:d},{:d})", region).fixed
			self.ig_regions[name] = reg
		except (AttributeError, IndexError):
			pass
Ejemplo n.º 5
0
	def parse_additional(self, additional):
		additional = "".join(additional.split())
		try:
			name = parse.search(";+{:w}:", additional).fixed[0]
			add = parse.search(":{:w}", additional).fixed[0]
			self.ig_additional[name] = add
		except (AttributeError, IndexError):
			pass
Ejemplo n.º 6
0
def signal_params(filename):
    "Extract signal parameter values from filename"
    yuk = search("TopYuk_{:f}", filename).fixed[0]
    lam = search("SlfCoup_{:f}", filename)
    if lam is None:
        lam = -search("SlfCoup_m{:f}", filename).fixed[0]
    else:
        lam = lam.fixed[0]
    return (yuk, lam)
Ejemplo n.º 7
0
 def find_person(self):
   """
   Load committee details for the given detail page URL or numeric ID
   """
   # Read either person_id or committee_url from the opposite
   user_overview_url = self.urls['PERSON_OVERVIEW_PRINT_PATTERN'] % self.config['scraper']['base_url']
   logging.info("Getting user overview from %s", user_overview_url)
   
   time.sleep(self.config['scraper']['wait_time'])
   response = self.get_url(user_overview_url)
   if not response:
     return
   
   # seek(0) is necessary to reset response pointer.
   response.seek(0)
   html = response.read()
   html = html.replace(' ', ' ')
   parser = etree.HTMLParser()
   dom = etree.parse(StringIO(html), parser)
   
   trs = dom.xpath(self.xpath['PERSONLIST_LINES'])
   for tr in trs:
     current_person = None
     link = tr.xpath('.//a')
     if len(link):
       parsed = parse.search(self.urls['PERSON_DETAIL_PARSE_PATTERN'], link[0].get('href'))
       if not parsed:
         parsed = parse.search(self.urls['PERSON_DETAIL_PARSE_PATTERN_ALT'], link[0].get('href'))
       if parsed:
         person_id = parsed['person_id']
         current_person = Person(originalId=person_id)
     if current_person:
       tds = tr.xpath('.//td')
       if len(tds):
         if len(tds[0]):
           person_name = tds[0][0].text.strip()
           if person_name:
             current_person.name = person_name
       if len(tds) > 1:
         person_party = tds[1].text.strip()
         if person_party:
           for party_alias in self.config['scraper']['party_alias']:
             if party_alias[0] == person_party:
               person_party = party_alias[1]
               break
           new_organization = Organization(originalId=person_party,
                                           name=person_party,
                                           classification='party')
           new_membership = Membership(originalId=unicode(person_id) + '-' + person_party,
                                       organization=new_organization)
           current_person.membership = [new_membership]
       if current_person:
         if hasattr(self, 'person_queue'):
           self.person_queue.add(current_person.originalId)
         self.db.save_person(current_person)
   return
Ejemplo n.º 8
0
 def __init__(self, string):
     if string is not None:
         a = parse.search("items={page:d}-{limit:d}", string)
         b = parse.search("items={page:d}-", string)
         c = parse.search("items=-{limit:d}", string)
         s = a or b or c
         if s:
             s = s.named
             self.page = s.get('page', None)
             self.limit = s.get('limit', None)
Ejemplo n.º 9
0
 def __init__(self, data):
     """Initialize Mode from xrandr data."""
     self.data = data
     self.header = data[0]
     self.name = parse.search("({mode_name})", self.header)["mode_name"]
     self.res_x = parse.search("h: width{:s}{res_x:d}", data[1])["res_x"]
     self.res_y = parse.search("v: height{:s}{res_y:d}", data[2])["res_y"]
     self.refresh = parse.search("{refresh:f}Hz", data[2])["refresh"]
     self.preferred = "+preferred" in self.header
     self.current = "*current" in self.header
Ejemplo n.º 10
0
 def parseError(self, errorDesc):
     parsed = search("MapperParsingException[{}[{field_name}]{}", errorDesc)
     if not parsed:
         parsed = search("MapperParsingException[{}[{}]{}[{field_name}]{}", errorDesc)
     if not parsed:
         parsed = search("{}MapperParsingException[{}[{field_name}]{}", errorDesc)
     if not parsed:
         parsed = search("{}MapperParsingException[{}[{}]{}[{field_name}]{}", errorDesc)
     LOG.info("Parsed ES Error: %s from description %s", parsed, errorDesc)
     if parsed and parsed.named:
         return parsed.named
     LOG.warning("Couldn't parse ES error: %s", errorDesc)
     return None
Ejemplo n.º 11
0
    def get_end_time_from_file(self):
        """
        Get first and last file of list and set end time from file name

        :returns: list of begin and end time as string
        """

        pattern = 'TMP_TGL_2m_{}_allmembers.grib2'

        begin = search(pattern, self.file_list[0])[0]
        end = search(pattern, self.file_list[-1])[0]

        return begin, end
Ejemplo n.º 12
0
 def find_person(self):
   """
   Load committee details for the given detail page URL or numeric ID
   """
   # Read either person_id or committee_url from the opposite
   user_overview_url = self.urls['PERSON_OVERVIEW_PRINT_PATTERN']
   logging.info("Getting user overview from %s", user_overview_url)
   
   time.sleep(self.config.WAIT_TIME)
   response = self.get_url(user_overview_url)
   if not response:
     return
   
   # seek(0) is necessary to reset response pointer.
   response.seek(0)
   html = response.read()
   html = html.replace(' ', ' ')
   parser = etree.HTMLParser()
   dom = etree.parse(StringIO(html), parser)
   
   trs = dom.xpath(self.xpath['PERSONLIST_LINES'])
   for tr in trs:
     current_person = None
     link = tr.xpath('.//a')
     if len(link):
       parsed = parse.search(self.urls['PERSON_DETAIL_PARSE_PATTERN'], link[0].get('href'))
       if not parsed:
         parsed = parse.search(self.urls['PERSON_DETAIL_PARSE_PATTERN_ALT'], link[0].get('href'))
       if parsed:
         person_id = parsed['person_id']
         current_person = Person(numeric_id=person_id)
     if current_person:
       tds = tr.xpath('.//td')
       if len(tds):
         if len(tds[0]):
           person_name = tds[0][0].text.strip()
           if person_name:
             current_person.title = person_name
       if len(tds) > 1:
         person_party = tds[1].text.strip()
         if person_party:
           if person_party in self.config.PARTY_ALIAS:
             person_party = self.config.PARTY_ALIAS[person_party]
           current_person.committee = [{'committee': Committee(identifier=person_party, title=person_party, type='party')}]
       if current_person:
         if hasattr(self, 'person_queue'):
           self.person_queue.add(current_person.numeric_id)
         self.db.save_person(current_person)
   return
Ejemplo n.º 13
0
 def parseLine(self, line):
     try:
         if not ": (point" in line: return
         dateTimeStr = parse.search("[{}]", line)[0]
         point = parse.search("(point {})", line)[0].split(" ")
     except Exception as detail:
         print("publish: Parse error for", line, "-", detail)
         return
     try:
         dateTime = datetime.strptime(dateTimeStr, "%Y-%m-%d %H:%M:%S.%f")
     except Exception as detail:
         print("publish: Date/time conversion error for", line, "-", detail)
         return
         
     self.pointNameToNDNName(point[0])
Ejemplo n.º 14
0
    def parseLine(self, line):
        try:
            if not ": (point" in line: return
            dateTimeStr = parse.search("[{}]", line)[0]
            point = parse.search("(point {})", line)[0].split(" ")
        except Exception as detail:
            print("publish: Parse error for", line, "-", detail)
            return
        try:
            dateTime = datetime.strptime(dateTimeStr, "%Y-%m-%d %H:%M:%S.%f")
        except Exception as detail:
            print("publish: Date/time conversion error for", line, "-", detail)
            return

        self.pointNameToNDNName(point[0])
Ejemplo n.º 15
0
def parser(data):
    pattern = "{op} {ax} {val:d}"
    match = parse.search(pattern, data)
    if match:
        return match.named

    pattern = "{op} {ax} {val}"
    match = parse.search(pattern, data)
    if match:
        return match.named

    pattern = "{op} {ax}"
    match = parse.search(pattern, data)
    if match:
        return match.named
Ejemplo n.º 16
0
def pick(token, input_msg):
    result = search(token, input_msg)
    if result is not None:
        result, = result.fixed
        return result
    else:
        return None
Ejemplo n.º 17
0
async def server_countdown_close_connection_in_middle(ws, path):
    await WebSocketServerHelper.send_connection_ack(ws)

    result = await ws.recv()
    json_result = json.loads(result)
    assert json_result["type"] == "start"
    payload = json_result["payload"]
    query = payload["query"]
    query_id = json_result["id"]

    count_found = search("count: {:d}", query)
    count = count_found[0]
    stopping_before = count // 2
    print(
        f"Countdown started from: {count}, stopping server before {stopping_before}"
    )
    for number in range(count, stopping_before, -1):
        await ws.send(
            countdown_server_answer.format(query_id=query_id, number=number))
        await asyncio.sleep(2 * MS)

    print("Closing server while subscription is still running now")
    await ws.close()
    await ws.wait_closed()
    print("Server is now closed")
Ejemplo n.º 18
0
    def get_submission(self, submission_url=None, submission_id=None):
        """
        Load submission (Vorlage) details for the submission given by detail page URL
        or numeric ID
        """
        # Read either submission_id or submission_url from the opposite
        if submission_id is not None:
            submission_url = self.urls[
                'SUBMISSION_DETAIL_PRINT_PATTERN'] % submission_id
        elif submission_url is not None:
            parsed = parse.search(self.urls['SUBMISSION_DETAIL_PARSE_PATTERN'],
                                  submission_url)
            submission_id = parsed['submission_id']

        logging.info("Getting submission %d from %s", submission_id,
                     submission_url)

        submission = Submission(numeric_id=submission_id)

        time.sleep(self.config.WAIT_TIME)
        try:
            response = self.user_agent.open(submission_url)
        except urllib2.HTTPError, e:
            if e.code == 404:
                sys.stderr.write(
                    "URL not found (HTTP 404) error caught: %s\n" %
                    submission_url)
                sys.stderr.write(
                    "Please check BASE_URL in your configuration.\n")
                sys.exit(1)
Ejemplo n.º 19
0
    def find_sessions(self, start_date=None, end_date=None):
        """
        Find sessions within a given time frame and add them to the session queue.
        """
        # list of (year, month) tuples to work from
        start_month = start_date.month
        end_months = (end_date.year - start_date.year) * 12 + end_date.month + 1
        monthlist = [(yr, mn) for (yr, mn) in (
            ((m - 1) / 12 + start_date.year, (m - 1) % 12 + 1) for m in range(start_month, end_months)
        )]

        for (year, month) in monthlist:
            url = self.urls['CALENDAR_MONTH_PRINT_PATTERN'] % (year, month)
            logging.info("Looking for sessions in %04d-%02d at %s", year, month, url)
            time.sleep(self.config.WAIT_TIME)
            response = self.user_agent.open(url)
            html = response.read()
            html = html.replace(' ', ' ')
            parser = etree.HTMLParser()
            dom = etree.parse(StringIO(html), parser)
            found = 0
            for link in dom.xpath('//a'):
                href = link.get('href')
                if href is None:
                    continue
                parsed = parse.search(self.urls['SESSION_DETAIL_PARSE_PATTERN'], href)
                if hasattr(self, 'session_queue') and parsed is not None:
                    self.session_queue.add(int(parsed['session_id']))
                    found += 1
            if found == 0:
                logging.info("No sessions found for month %04d-%02d", year, month)
                if self.options.verbose:
                    print "No sessions found for month %04d-%02d\n" % (year, month)
Ejemplo n.º 20
0
 def get_resource(self, path: str) -> ResourceABC:
     result = parse.search(self.config.pattern, path)
     if result:
         canonical = self.config.pattern.format(**result.named)
         if path.startswith(canonical):
             return self.create_resource(canonical, **result.named)
     return None
Ejemplo n.º 21
0
 def _get_session_name(self, f: Path):
     if self.filename_format is not None and fnmatch(
             self.filename_format, "*{session*}*"):
         sess_name = search(self.filename_format, f.name)['session']
     else:
         sess_name = f.name
     return sess_name
Ejemplo n.º 22
0
def test_change_password(logged_in, mailoutbox, settings):
    payload = dict(
        old_password='******',
        new_password='******',
    )

    response = logged_in.client.patch(
        reverse('users:change_password'),
        json.dumps(payload),
        content_type='application/json',
    )

    assert response.status_code == 200

    assert len(mailoutbox) == 1

    msg = mailoutbox[0]

    assert msg.subject == 'Your {site_name} password has been changed'.format(
        site_name=settings.SITE_NAME)
    assert msg.to == [logged_in.user.email]

    match = parse.search(
        '''Your password has been changed!''',
        msg.body,
    )
    assert match is not None
Ejemplo n.º 23
0
def lookup_opcode(asm_line):
    for i in INSTRUCTIONS_TABLE.values():
        if parse.search(i.asm_exp, asm_line) \
                and asm_line.split()[0] == i.asm_exp.split()[0] \
                and len(asm_line.split()) == len(i.asm_exp.split()):
            return i.get_opcode(asm_line)
    return None
Ejemplo n.º 24
0
def skip_page(markdown: str) -> bool:
    template = "git-snippet: {action:S}"
    result = parse.search(template, markdown)
    if result:
        if result["action"] == "enable":
            return False
    return True
Ejemplo n.º 25
0
    def get_end_time_from_file(self):
        """
        Get last file of list and set end time from file name

        :returns: list of begin and end time as string
        """

        pattern = 'ps10km_{}_000.grib2'

        begin = search(pattern, self.file_list[0])[0]
        begin = datetime.strptime(begin, '%Y%m%d%H').strftime('%Y-%m-%dT%HZ')

        end = search(pattern, self.file_list[-1])[0]
        end = datetime.strptime(end, '%Y%m%d%H').strftime('%Y-%m-%dT%HZ')

        return [begin, end]
Ejemplo n.º 26
0
    def _update_next_area(self):
        """
        Update the area after the new one.

        Return the unlock link for the new area.
        """
        # Some have this phrase in it's own sentence, others don't
        link = u'nlocked once you have completed %s in [[%s]].'
        if self.new_number not in self.areas_list:
            # New area is the last one
            return link % (u'some job', self.after)
        page = pywikibot.Page(pywikibot.Site(),
                              self.areas_list[self.new_number])
        old_text = page.get()
        # Also replace the area number while we're there
        i = self.new_number + 1
        text = old_text.replace(number_map[i], number_map[i+1])
        job = parse.search(link % (u'{}', self.after),
                           old_text).fixed[0]
        old_link = link % (job, self.after)
        text = text.replace(old_link,
                            link % (u'some job', self.area_name))
        self._update_page(page, old_text, text)
        # Return the link so it can go in the new page
        return old_link
Ejemplo n.º 27
0
 def search_for_element(self, response, resp_param):
     text = response.text
     # поиск param в теле text
     param = search(resp_param, text)
     # выборка нужного элемента
     element = param.fixed[0]
     return element
async def server_countdown(ws, path):
    import websockets
    from .conftest import MS, PhoenixChannelServerHelper

    try:
        await PhoenixChannelServerHelper.send_connection_ack(ws)

        result = await ws.recv()
        json_result = json.loads(result)
        assert json_result["event"] == "doc"
        payload = json_result["payload"]
        query = payload["query"]
        query_id = json_result["ref"]

        count_found = search("count: {:d}", query)
        count = count_found[0]
        print(f"Countdown started from: {count}")

        await ws.send(subscription_server_answer)

        async def counting_coro():
            for number in range(count, -1, -1):
                await ws.send(
                    countdown_server_answer.format(query_id=query_id, number=number)
                )
                await asyncio.sleep(2 * MS)

        counting_task = asyncio.ensure_future(counting_coro())

        async def stopping_coro():
            nonlocal counting_task
            while True:

                result = await ws.recv()
                json_result = json.loads(result)

                if json_result["type"] == "stop" and json_result["id"] == str(query_id):
                    print("Cancelling counting task now")
                    counting_task.cancel()

        stopping_task = asyncio.ensure_future(stopping_coro())

        try:
            await counting_task
        except asyncio.CancelledError:
            print("Now counting task is cancelled")

        stopping_task.cancel()

        try:
            await stopping_task
        except asyncio.CancelledError:
            print("Now stopping task is cancelled")

        await PhoenixChannelServerHelper.send_close(ws)
    except websockets.exceptions.ConnectionClosedOK:
        pass
    finally:
        await ws.wait_closed()
Ejemplo n.º 29
0
def _get_failure_info(stdout, vcd_path):
    waveform = _render_vcd(vcd_path)
    search_format = 'Assert failed in top: {}:{linenumber:d}'
    line_num = search(search_format, stdout)['linenumber']
    step_format = 'Checking assertions in step {step_num:d}..'
    steps = findall(step_format, stdout)
    step_num = list(steps)[-1]['step_num']
    return BMC_Result(Result.FAIL, stdout, waveform, step_num, line_num)
Ejemplo n.º 30
0
def readccbbm_using_parse(filename):

    fid = open(filename, 'rt')
    lines = fid.readlines()
    regex = r'[-+]?[0-9]*\.?[0-9]+(?:[eE][-+]?[0-9]+)?'

    coords = []

    # Skip all lines until vertex
    ctr = 0
    parser1 = parse.compile(
        "Vertex {:d} {:g} {:g} {:g} {Jfeature=({:g} {:g} {:g} {:g} {:g} {:g} {:g})}"
    )
    parser2 = parse.compile("Vertex {:d} {:g} {:g} {:g}")

    while lines[ctr][0:6] != 'Vertex':
        ctr += 1

    # First read the vertices
    line = lines[ctr]
    line_split = line.split()
    ctr += 1
    # ctr = 1;
    attributes = []
    while line_split[0] == 'Vertex':
        result = parser1.parse(line)
        if result is not None:
            (idx, vtx1, vtx2, vtx3, radial_dist, mTBM1, mTBM2, mTBM3,
             detJacobian, eig1Jacobian, eig2Jacobian) = result.fixed
            attributes.append(radial_dist)
        else:
            result = parser2.parse(line)
            if result is not None:
                (idx, vtx1, vtx2, vtx3) = result.fixed
            else:
                sys.stdout.write('Cannot parse the line' + line)

        coords.append([vtx1, vtx2, vtx3])
        line = lines[ctr]
        line_split = line.split()
        ctr += 1

    coords = np.array(coords)
    # The rest of the lines are faces
    faces = []
    ctr -= 1
    for ii in range(ctr, len(lines)):
        line = lines[ii]
        result = parse.search("Face {:d} {:d} {:d} {:d}", line)
        (idx, face1, face2, face3) = result.fixed
        faces.append([face1, face2, face3])

    faces = np.array(faces)
    if faces.min() == 1:
        faces -= 1

    isMultilevelUCF = False
    return coords, faces, attributes, isMultilevelUCF
Ejemplo n.º 31
0
    def prepare_data_pack(info_bundle: InfoBundle) -> DataPack:
        date = info_bundle.date
        daily_cases = NumberParser.int_with_space(
            search(DataMiner.patterns["daily_cases"],
                   info_bundle.text)["cases"])
        daily_deaths = NumberParser.int_with_space(
            search(DataMiner.patterns["daily_deaths_direct"], info_bundle.text)["deaths"]) + \
            NumberParser.int_with_space(
            search(DataMiner.patterns["daily_deaths_linked"], info_bundle.text)["deaths"])
        daily_tests = NumberParser.int_with_modifier(
            search(DataMiner.patterns["daily_tests"], info_bundle.text)[0])
        total_cases = NumberParser.int_with_space(
            search(DataMiner.patterns["totals"], info_bundle.text)["cases"])
        total_deaths = NumberParser.int_with_space(
            search(DataMiner.patterns["totals"], info_bundle.text)["deaths"])

        voivodeship_stats = {}
        for v in DataMiner.voivodeships:
            v_cases = 0
            v_match = search(" " + v + "go ({})", info_bundle.text)
            if v_match:
                v_cases = v_match[0]
                v_cases = NumberParser.int_with_space(v_cases)
            voivodeship_stats[v.capitalize()] = {
                "date": date,
                "daily infected": v_cases
            }

        return DataPack(date, daily_cases, daily_deaths, daily_tests,
                        total_cases, total_deaths, voivodeship_stats)
Ejemplo n.º 32
0
	def parse_header(self, header):
		header = "".join(header.split())
		try:
			self.ig_name = parse.search(">{:S}@", header).fixed[0]
		except (AttributeError, IndexError):
			self.ig_name = "Unknown"
		self.ig_type = _parse_header_helper("type", header)
		self.ig_alpha = _parse_header_helper("alpha", header)
		self.ig_specie = _parse_header_helper("specie", header)
Ejemplo n.º 33
0
def parse_download_fname(fname):

    # Use Parse to attempt to parse filenames for metadata.
    r = parse.search('{name}-{version}.tar', fname)
    if not r:
        r = parse.search('{name}-{version}.zip', fname)
    if not r:
        r = parse.parse('{name}-{version}-{extra}.{ext}', fname)

    version = r['version']

    # Support for requirements-parser-0.1.0.tar.gz
    # TODO: Some versions might actually have dashes, will need to figure that out.
    # Will likely have to check of '-' comes at beginning or end of version.
    if '-' in version:
        version = version.split('-')[-1]

    return version
 def available_dates(self):
     result = subprocess.run(['s3cmd', 'ls', f's3://{self.bucket}/'],
                             capture_output=True)
     dates = set()
     for line in result.stdout.decode().splitlines():
         res = parse.search(f's3://{self.bucket}/' + self.pattern, line)
         if res is not None:
             dates.add(str(res.named['date']))
     return dates
Ejemplo n.º 35
0
 def __init__(self,logbookfd):
     self.fileContents = logbookfd.read()
     self.rawLogEntries = list(r.fixed[0] for r in
             parse.findall("+++Begin log entry+++{}"
                 + "+++End log entry+++", self.fileContents))
     self.logEntries = []
     for entry in self.rawLogEntries:
         timestamp = parse.search("Time:{i}\n", entry)['i']
         user = parse.search("User:{i}\n", entry)['i']
         note = parse.search("Note:{i}\n", entry)['i']
         tags = list(r.fixed[0] for r in
                 parse.findall("\'+{}\'", entry))
         addedFiles = list(r.fixed[0] for r in 
                 parse.findall(addFileStr + "{}\n", entry))
         removedFiles = list(r.fixed[0] for r in 
                 parse.findall(removeFileStr + "{}\n", entry))
         self.logEntries.append(logEntry(timestamp, 
             user, note, tags, addedFiles, removedFiles))
Ejemplo n.º 36
0
def importAxis(file, macroKeyword):
    axis = []
    with open(file) as f:
        for line in f:
             val = search(macroKeyword+'({:d})', line)
             if val:
                 axis.append(val[0])
    assert len(axis), "No axis data found"
    return axis
Ejemplo n.º 37
0
def cartodb2ogr(service_endpoint, aoi, out_fields, where='', _=''):
    global FUNCTION_COUNT
    FUNCTION_COUNT += 1
    # logging.info('FUNCTION cartodb2ogr STEP {} START'.format(FUNCTION_COUNT))
    t0 = time()

    endpoint_template = 'https://{}.carto.com/tables/{}/'
    username, table = search(endpoint_template, service_endpoint + '/')
    url = 'https://{username}.carto.com/api/v2/sql'.format(username=username)

    if isinstance(aoi, str):
        aoi = json.loads(aoi)

    params = {}
    fields = ['ST_AsGeoJSON(the_geom) as geometry']
    out_fields = out_fields.split(',')
    for field in out_fields:
        if field:
            fields.append('{field} as {field}'.format(field=field))

    temp = "ST_Intersects(ST_Buffer(ST_GeomFromText('{}',4326),0),the_geom)"
    features = []
    objectids = []
    for f in aoi['features']:
        where_clause = temp.format(
            wkt.dumps({
                'type': 'Polygon',
                'coordinates': bbox(f)
            }))
        if where and not where == '1=1':
            where_clause += 'AND {}'.format(where)

        q = 'SELECT {fields} FROM {table} WHERE {where}'
        params = {
            'q': q.format(fields=','.join(fields),
                          table=table,
                          where=where_clause)
        }

        try:
            req = requests.get(url, params=params)
            req.raise_for_status()
        except Exception as e:
            raise ValueError((e, url, bbox(f)))

        response = json.loads(req.text)['rows']
        features += [{
            'type': 'Feature',
            'geometry': json.loads(h['geometry']),
            'properties': {field: h[field]
                           for field in out_fields if field}
        } for h in response]

    featureset = json2ogr({'type': 'FeatureCollection', 'features': features})

    # logging.info('FUNCTION cartodb2ogr STEP {} DONE - {} SECONDS'.format(FUNCTION_COUNT, time()-t0))
    return featureset
Ejemplo n.º 38
0
    def parse_phase(self, phase_repr):
        """
        Because the SUMO object Phase does not contain accessors,
        we parse the string representation to retrieve data members.
        :param phase_repr: The Phase string representation
        :return: An new Phase instance
        """
        duration = search('duration: {:f}', phase_repr)
        min_duration = search('minDuration: {:f}', phase_repr)
        max_duration = search('maxDuration: {:f}', phase_repr)
        phase_def = search('phaseDef: {}\n', phase_repr)

        if phase_def is None:
            phase_def = ''
        else:
            phase_def = phase_def[0]

        return Phase(duration[0], min_duration[0], max_duration[0], phase_def)
Ejemplo n.º 39
0
def importAxis(file, macroKeyword):
    axis = []
    with open(file) as f:
        for line in f:
            val = search(macroKeyword + '({:d})', line)
            if val:
                axis.append(val[0])
    assert len(axis), "No axis data found"
    return axis
    def test_compaction_throughput(self):
        """
        Test setting compaction throughput.
        Set throughput, insert data and ensure compaction performance corresponds.
        """
        cluster = self.cluster
        cluster.populate(1).start(wait_for_binary_proto=True)
        [node1] = cluster.nodelist()

        # disableautocompaction only disables compaction for existing tables,
        # so initialize stress tables with stress first
        stress_write(node1, keycount=1)
        node1.nodetool('disableautocompaction')

        stress_write(node1, keycount=200000 * cluster.data_dir_count)

        threshold = "5"
        node1.nodetool('setcompactionthroughput -- ' + threshold)

        node1.flush()
        if node1.get_cassandra_version() < '2.2':
            log_file = 'system.log'
        else:
            log_file = 'debug.log'
        mark = node1.mark_log(filename=log_file)
        node1.compact()
        matches = node1.watch_log_for('Compacted',
                                      from_mark=mark,
                                      filename=log_file)

        stringline = matches[0]

        throughput_pattern = '{}={avgthroughput:f}{units}/s'
        m = parse.search(throughput_pattern, stringline)
        avgthroughput = m.named['avgthroughput']
        found_units = m.named['units']

        unit_conversion_dct = {
            "MB": 1,
            "MiB": 1,
            "KiB": 1. / 1024,
            "GiB": 1024
        }

        units = ['MB'] if cluster.version() < LooseVersion('3.6') else [
            'KiB', 'MiB', 'GiB'
        ]
        assert found_units in units

        logger.debug(avgthroughput)
        avgthroughput_mb = unit_conversion_dct[found_units] * float(
            avgthroughput)

        # The throughput in the log is computed independantly from the throttling and on the output files while
        # throttling is on the input files, so while that throughput shouldn't be higher than the one set in
        # principle, a bit of wiggle room is expected
        assert float(threshold) + 0.5 >= avgthroughput_mb
Ejemplo n.º 41
0
def solve(instance):
   
    result = instance.copy()
    strips_list = [map_doc_strips[doc] for doc in instance['docs']]
    strips = MixedStrips(strips_list, shuffle=True)
    prefix_dir = '{}/data/stripes/'.format(DOCREASSEMBLY_PATH_DIR)
    suffix_dir = '_{}'.format(len(strips.strips)) # to match the rule of the DocReassmeble software
    
    # create a temporary directory to hold the reconstruction instance data
    with tempfile.TemporaryDirectory(prefix=prefix_dir, suffix=suffix_dir) as tmpdirname:
        # record the current directory and move to the DocReassembly root directory
        curr_dir = os.path.abspath('.')
        os.chdir(DOCREASSEMBLY_PATH_DIR)

        # case (instance) name is the basename of the directory without the final _<n>, where n is the
        # number of strips. DocReassmebly will concatenate the path data/stripes/<case_name>
        # with the parameter <n>.
        case_name = os.path.basename(tmpdirname).replace(suffix_dir, '')
        
        # set the command to be executed (replace open parameters in the template string)
        cmd = CMD_TEMPLATE.format(case_name, len(strips.strips))
        cmd = cmd.split() # split command to put in the format of the subprocess system call format

        # copy strips' images into the temporary directory
        for i, strip in enumerate(strips.strips):
            cv2.imwrite('{}/{}.png'.format(tmpdirname, i), strip.image[: MAX_STRIP_HEIGHT, :, :: -1])
        
        # write the order file (ground-truth)
        order = len(strips.strips) * ['0'] # inverted init perm (which piece should be in each position?)
        for pos, element in enumerate(strips.init_perm):
            order[element] = str(pos)
        open('{}/order.txt'.format(tmpdirname), 'w').write('\n'.join(order))
        # while(1): pass
        
        # run the software
        with open(os.devnull, 'w') as devnull:
            output = str(subprocess.check_output(cmd))#, stderr=devnull))
        os.chdir(curr_dir) # return to the original directory
    
    sizes = instance['sizes']
    solution = [int(s) for s in search('Composed order: {} \\n', output).fixed[0].split()]
    result['opt_time'] = float(search('Computation time: {}s', output).fixed[0])
    result['accuracy'] = neighbor_comparison(solution, strips.init_perm, sizes)
    return result
Ejemplo n.º 42
0
    def increase_sstable_generations(self, sstables):
        """
        After finding the number of existing sstables, increase all of the
        generations by that amount.
        """
        for table_or_index, table_sstables in sstables.items():
            increment_by = len(set(parse.search('{}-{increment_by}-{suffix}.{file_extention}', s).named['increment_by'] for s in table_sstables))
            sstables[table_or_index] = [self.increment_generation_by(s, increment_by) for s in table_sstables]

        debug('sstables after increment {}'.format(str(sstables)))
 def __try_search(queries, string_to_search):
   result = None
   for query in queries:
     try:
       result = search(query, string_to_search)
     except ValueError:
       # This occurs if there's an apostrophe (') in the input string and it can't
       # convert a string and int
       pass
     if result is not None:
       return result
   return None
Ejemplo n.º 44
0
    def test_compaction_throughput(self):
        """
        Test setting compaction throughput.
        Set throughput, insert data and ensure compaction performance corresponds.
        """
        cluster = self.cluster
        cluster.populate(1).start(wait_for_binary_proto=True)
        [node1] = cluster.nodelist()

        # disableautocompaction only disables compaction for existing tables,
        # so initialize stress tables with stress first
        stress_write(node1, keycount=1)
        node1.nodetool('disableautocompaction')

        stress_write(node1, keycount=200000 * cluster.data_dir_count)

        threshold = "5"
        node1.nodetool('setcompactionthroughput -- ' + threshold)

        node1.flush()
        if node1.get_cassandra_version() < '2.2':
            log_file = 'system.log'
        else:
            log_file = 'debug.log'
        mark = node1.mark_log(filename=log_file)
        node1.compact()
        matches = node1.watch_log_for('Compacted', from_mark=mark, filename=log_file)

        stringline = matches[0]

        throughput_pattern = '{}={avgthroughput:f}{units}/s'
        m = parse.search(throughput_pattern, stringline)
        avgthroughput = m.named['avgthroughput']
        found_units = m.named['units']

        unit_conversion_dct = {
            "MB": 1,
            "MiB": 1,
            "KiB": 1. / 1024,
            "GiB": 1024
        }

        units = ['MB'] if cluster.version() < LooseVersion('3.6') else ['KiB', 'MiB', 'GiB']
        assert found_units in units

        logger.debug(avgthroughput)
        avgthroughput_mb = unit_conversion_dct[found_units] * float(avgthroughput)

        # The throughput in the log is computed independantly from the throttling and on the output files while
        # throttling is on the input files, so while that throughput shouldn't be higher than the one set in
        # principle, a bit of wiggle room is expected
        assert float(threshold) + 0.5 >= avgthroughput_mb
Ejemplo n.º 45
0
 def _update_previous_area(self):
     """Update the area before the new one."""
     page = pywikibot.Page(pywikibot.Site(), self.after)
     old_text = page.get()
     link = u'Completing %s job unlocks the [[%s]] area.'
     if self.new_number in self.areas_list:
         job = parse.search(link % (u'{}', self.areas_list[self.new_number]),
                            old_text).fixed[0]
         link = link % (job, u'%s')
         text = old_text.replace(link % self.areas_list[self.new_number],
                                 link % self.area_name)
     else:
         final = u'It is currently the final area.'
         text = old_text.replace(final, link % (u'some', self.area_name))
     self._update_page(page, old_text, text)
def get_all_characters(script_file_handle):
    """
    Given a file handle for a script file, return a list
    of all the characters.
    """
    characters_dict = {}
    script_file_handle.seek(0)
    for line in script_file_handle.readlines():
        line = line.strip()
        search_result = parse.search('{}:', line)
        if search_result:
            character = search_result.fixed[0]
            if character not in characters_dict:
                characters_dict[character] = True
    return characters_dict.keys()
Ejemplo n.º 47
0
def prepare(context, series):
    """
    Extract all experiments of series and save their relevant data to csv.
    """

    v = ExperimentDocument.view('adaptor/experiment-series')
    l = v.all()

    flags_set = collect_flags(l)
    truthness_d = make_flags_truthness_dict(flags_set, l)

    ll = []
    for doc in l:
        r = parse.search(
            '-DNI={:d} -DNJ={:d}', doc.settings.build_settings.other_flags)

        # Get sorted list of flags
        d = truthness_d[doc._id]
        keys = sorted(d.keys())
        flags_list = [d[k] for k in keys]

        new_row = [
            doc._id, doc.datetime, doc.validation_result.measured_time, 
            doc.settings.program, doc.settings.build_settings.compiler, 
            doc.settings.build_settings.base_opt, 
            doc.settings.build_settings.optimization_flags, 
            r[0], r[1], 
            doc.hardware_info.cpu.cpu_name, doc.hardware_info.cpu.cpu_mhz, 
            doc.hardware_info.cpu.cache_size]
        new_row.extend(flags_list)

        ll.append(new_row)

    rr = map(lambda i: "\t".join(map(str, i)), ll)
    r = map(lambda i: i + '\n', rr)

    # Keys are from loop up there
    flags_headers_list = keys

    headers = 'id\tdatetime\ttime\tprogram_name\tcompiler\t'\
        'base_opt\toptimization_flags\twidth\theight\tcpu_name\t'\
        'cpu_mhz\tcpu_cache\t'
    full_headers = headers + '\t'.join(flags_headers_list) + '\n'

    f = open(os.path.join(context.paths_manager.framework_root_dir, 
        'an/{0}.csv'.format(series)), 'w')
    f.write(full_headers)
    f.writelines(r)
Ejemplo n.º 48
0
 def _add_to_bosses_page(self):
     areas = len(self.areas_list)
     new_line = u'*[[File:%s|100px]] [[%s]] in the [[%s]] area' % (self._boss_image_name(),
                                                                   self.boss_name,
                                                                   self.area_name)
     page = pywikibot.Page(pywikibot.Site(), u'Bosses')
     text = old_text = page.get()
     intro = u'There are currently %s bosses in the game'
     bosses = parse.search(intro % (u'{:d}'), text).fixed[0]
     line_before = u' in the [[%s]] area' % self.after
     intro = intro % (u'%d')
     text = text.replace(intro % (bosses),
                         intro % ((bosses + 1)))
     text = text.replace(line_before,
                         line_before + u'\n' + new_line)
     self._update_page(page, old_text, text)
Ejemplo n.º 49
0
    def _check_chunk_length(self, session, value):
        result = session.cluster.metadata.keyspaces['ks'].tables['test_table'].as_cql_query()
        # Now extract the param list
        params = ''

        if self.cluster.version() < '3.0':
            if 'sstable_compression' in result:
                params = result
        else:
            if 'compression' in result:
                params = result

        self.assertNotEqual(params, '', "Looking for the string 'sstable_compression', but could not find it in {str}".format(str=result))

        chunk_string = "chunk_length_kb" if self.cluster.version() < '3.0' else "chunk_length_in_kb"
        chunk_length = parse.search("'" + chunk_string + "': '{chunk_length:d}'", result).named['chunk_length']

        self.assertEqual(chunk_length, value, "Expected chunk_length: {}.  We got: {}".format(value, chunk_length))
 def __check_proper_gah(string_to_search):
   # Check for the gah (gender / height / age) when formatted as: M/28/5'7"
   re_string = "((m|f|male|female)/\d+/\d+'\d+)"
   regex = re.compile(re_string, re.IGNORECASE)
   # print regex.match(submission.tti)
   match = regex.search(string_to_search)
   if match:
     rvalue = {}
     gah_str = match.group(0)
     # print gah_str
     # self.debug_str = gah_str
     result = search("{gender}/{age:d}/{feet:d}'{in:d}", gah_str)
     # print result.named
     rvalue['gender_is_female'] = RedditAnalyzer.__gender_from_string(result.named['gender'])
     rvalue['age'] = result.named['age']
     rvalue['height_in'] = result.named['feet'] * 12 + result.named['in']
     HITS_stats['check_proper_gah'] += 1
     return rvalue
   return None
Ejemplo n.º 51
0
def scan_to_dict(scan):
    try:
        res = subprocess.check_output(["identify", scan.get_tiff_path()]).strip()
    except subprocess.CalledProcessError:
        return None
    image_width, image_length = search('TIFF {:d}x{:d}', res).fixed
    image_width = int(image_width)
    image_length = int(image_length)
#    if image_width != scan.image.width or image_length != scan.image.height:
#        print "Discrepancy in image sizes for scan %s" % scan.image.name
    tile_width = 256
    tile_length = 256

    return { 'document': scan.document_id,
             'path': scan.get_tiff_name(),
             'url': scan.image.url,
             'tiles': {"w": tile_width, "h": tile_length},
             'resolutions': res.count('] TIFF '),
             'size': { "w": image_width, "h": image_length } }
    def call_token_generator(self, install_dir, randomPart, nodes):
        executable = os.path.join(install_dir, 'tools', 'bin', 'token-generator')
        if common.is_win():
            executable += ".bat"

        args = [executable]
        if randomPart is not None:
            if randomPart:
                args.append("--random")
            else:
                args.append("--murmur3")

        for n in nodes:
            args.append(str(n))

        debug('Invoking {}'.format(args))
        token_gen_output = subprocess.check_output(args)
        lines = token_gen_output.split("\n")
        dc_tokens = None
        generated_tokens = []
        for line in lines:
            if line.startswith("DC #"):
                if dc_tokens is not None:
                    self.assertGreater(dc_tokens.__len__(), 0, "dc_tokens is empty from token-generator {}".format(args))
                    generated_tokens.append(dc_tokens)
                dc_tokens = []
            else:
                if line:
                    m = parse.search('Node #{node_num:d}:{:s}{node_token:d}', line)
                    self.assertIsNotNone(m, "Line \"{}\" does not match pattern from token-generator {}".format(line, args))
                    node_num = int(m.named['node_num'])
                    node_token = int(m.named['node_token'])
                    dc_tokens.append(node_token)
                    self.assertEqual(node_num, dc_tokens.__len__(), "invalid token count from token-generator {}".format(args))
        self.assertIsNotNone(dc_tokens, "No tokens from token-generator {}".format(args))
        self.assertGreater(dc_tokens.__len__(), 0, "No tokens from token-generator {}".format(args))
        generated_tokens.append(dc_tokens)

        return generated_tokens
Ejemplo n.º 53
0
 def get_value(self, val):
     """
     :param val: The property to get
     :param val: str
     :return: the answer
     :rtype: str
     """
     self.p.stdin.write(GET + " " + val + "\n")
     self.p.stdin.flush()
     self.log.info("Command {0} sent to radio".format(GET + " " + val))
     n_tries = 10
     while n_tries > 0:
         try:  # Try to parse
             line = self.read_stdout()
             ret = parse.search("={}\n", line)
             if ret is not None and len(ret.fixed) != 0:
                 self.log.debug("got " + ret.fixed[0] + " to get of " + val)
                 return ret.fixed[0]
         except Exception as err:
             self.log.debug("Attempts {0}".format(n_tries))
             time.sleep(0.1)
             n_tries -= 1
             continue
Ejemplo n.º 54
0
 def get_person(self, person_url=None, person_id=None):
   """
   Load committee details for the given detail page URL or numeric ID
   """
   # Read either person_id or committee_url from the opposite
   if person_id is not None:
     person_url = self.urls['COMMITTEE_DETAIL_PRINT_PATTERN_FULL'] % person_id
   elif person_url is not None:
     parsed = parse.search(self.urls['COMMITTEE_DETAIL_PARSE_PATTERN_FULL'], person_url)
     person_id = parsed['person_id']
 
   logging.info("Getting meeting (committee) %d from %s", person_id, person_url)
   
   committee = Committee(numeric_id=person_id)
   
   time.sleep(self.config.WAIT_TIME)
   response = self.get_url(person_url)
   if not response:
     return
   
   # seek(0) is necessary to reset response pointer.
   response.seek(0)
   html = response.read()
   html = html.replace('&nbsp;', ' ')
   parser = etree.HTMLParser()
   dom = etree.parse(StringIO(html), parser)
   
   trs = dom.xpath(self.xpath['COMMITTEE_LINES'])
   for tr in trs:
     tds = tr.xpath('.//td')
     print tds
     if tr.get('class') == 'smcrowh':
       print tds[0].text
     else:
       for td in tds:
         print td[0].text
   return
Ejemplo n.º 55
0
    def compaction_throughput_test(self):
        """
        Test setting compaction throughput.
        Set throughput, insert data and ensure compaction performance corresponds.
        """
        cluster = self.cluster
        cluster.populate(1).start(wait_for_binary_proto=True)
        [node1] = cluster.nodelist()

        # disableautocompaction only disables compaction for existing tables,
        # so initialize stress tables with stress first
        stress_write(node1, keycount=1)
        node1.nodetool('disableautocompaction')

        stress_write(node1, keycount=200000 * cluster.data_dir_count)

        threshold = "5"
        node1.nodetool('setcompactionthroughput -- ' + threshold)

        matches = block_on_compaction_log(node1)
        stringline = matches[0]

        throughput_pattern = '{}={avgthroughput:f}{units}/s'
        m = parse.search(throughput_pattern, stringline)
        avgthroughput = m.named['avgthroughput']
        found_units = m.named['units']

        units = ['MB'] if LooseVersion(cluster.version()) < LooseVersion('3.6') else ['KiB', 'MiB', 'GiB']
        self.assertIn(found_units, units)

        debug(avgthroughput)

        # The throughput in the log is computed independantly from the throttling and on the output files while
        # throttling is on the input files, so while that throughput shouldn't be higher than the one set in
        # principle, a bit of wiggle room is expected
        self.assertGreaterEqual(float(threshold) + 0.5, float(avgthroughput))
Ejemplo n.º 56
0
    def get_submission(self, submission_url=None, submission_id=None):
        """
        Load submission (Vorlage) details for the submission given by detail page URL
        or numeric ID
        """
        # Read either submission_id or submission_url from the opposite
        if submission_id is not None:
            submission_url = self.urls['SUBMISSION_DETAIL_PRINT_PATTERN'] % submission_id
        elif submission_url is not None:
            parsed = parse.search(self.urls['SUBMISSION_DETAIL_PARSE_PATTERN'], submission_url)
            submission_id = parsed['submission_id']

        logging.info("Getting submission %d from %s", submission_id, submission_url)

        submission = Submission(numeric_id=submission_id)

        time.sleep(self.config.WAIT_TIME)
        try:
            response = self.user_agent.open(submission_url)
        except urllib2.HTTPError, e:
            if e.code == 404:
                sys.stderr.write("URL not found (HTTP 404) error caught: %s\n" % submission_url)
                sys.stderr.write("Please check BASE_URL in your configuration.\n")
                sys.exit(1)
Ejemplo n.º 57
0
 except:
     continue
 if tdcontent == 'Name:':
     submission.identifier = tds[n + 1].text.strip()
 elif tdcontent == 'Art:':
     submission.type = tds[n + 1].text.strip()
 elif tdcontent == 'Datum:':
     submission.date = tds[n + 1].text.strip()
 elif tdcontent == 'Name:':
     submission.identifier = tds[n + 1].text.strip()
 elif tdcontent == 'Betreff:':
     submission.subject = '; '.join(tds[n + 1].xpath('./text()'))
 elif tdcontent == 'Referenzvorlage:':
     link = tds[n + 1].xpath('a')[0]
     href = link.get('href')
     parsed = parse.search(self.urls['SUBMISSION_DETAIL_PARSE_PATTERN'], href)
     submission.superordinate = {
         'identifier': link.text.strip(),
         'numeric_id': parsed['submission_id']
     }
     # add superordinate submission to queue
     if hasattr(self, 'submission_queue'):
         self.submission_queue.add(parsed['submission_id'])
 # subordinate submissions are added to the queue
 elif tdcontent == 'Untergeordnete Vorlage(n):':
     current_category = 'subordinates'
     for link in tds[n + 1].xpath('a'):
         href = link.get('href')
         parsed = parse.search(self.urls['SUBMISSION_DETAIL_PARSE_PATTERN'], href)
         if hasattr(self, 'submission_queue') and parsed is not None:
             #add subordinate submission to queue
Ejemplo n.º 58
0
    def test_compactionstats(self):
        """
        @jira_ticket CASSANDRA-10504
        @jira_ticket CASSANDRA-10427

        Test that jmx MBean used by nodetool compactionstats
        properly updates the progress of a compaction
        """

        cluster = self.cluster
        cluster.populate(1)
        node = cluster.nodelist()[0]
        remove_perf_disable_shared_mem(node)
        cluster.start(wait_for_binary_proto=True)

        # Run a quick stress command to create the keyspace and table
        node.stress(['write', 'n=1', 'no-warmup'])
        # Disable compaction on the table
        node.nodetool('disableautocompaction keyspace1 standard1')
        node.nodetool('setcompactionthroughput 1')
        node.stress(['write', 'n=150K', 'no-warmup'])
        node.flush()
        # Run a major compaction. This will be the compaction whose
        # progress we track.
        node.nodetool_process('compact')
        # We need to sleep here to give compaction time to start
        # Why not do something smarter? Because if the bug regresses,
        # we can't rely on jmx to tell us that compaction started.
        time.sleep(5)

        compaction_manager = make_mbean('db', type='CompactionManager')
        with JolokiaAgent(node) as jmx:
            progress_string = jmx.read_attribute(compaction_manager, 'CompactionSummary')[0]

            # Pause in between reads
            # to allow compaction to move forward
            time.sleep(2)

            updated_progress_string = jmx.read_attribute(compaction_manager, 'CompactionSummary')[0]
            var = 'Compaction@{uuid}(keyspace1, standard1, {progress}/{total})bytes'
            progress = int(parse.search(var, progress_string).named['progress'])
            updated_progress = int(parse.search(var, updated_progress_string).named['progress'])

            logger.debug(progress_string)
            logger.debug(updated_progress_string)

            # We want to make sure that the progress is increasing,
            # and that values other than zero are displayed.
            assert updated_progress > progress
            assert progress >= 0
            assert updated_progress > 0

            # Block until the major compaction is complete
            # Otherwise nodetool will throw an exception
            # Give a timeout, in case compaction is broken
            # and never ends.
            start = time.time()
            max_query_timeout = 600
            logger.debug("Waiting for compaction to finish:")
            while (len(jmx.read_attribute(compaction_manager, 'CompactionSummary')) > 0) and (
                    time.time() - start < max_query_timeout):
                logger.debug(jmx.read_attribute(compaction_manager, 'CompactionSummary'))
                time.sleep(2)
Ejemplo n.º 59
0
    def get_session(self, session_url=None, session_id=None):
        """
        Load session details for the given detail page URL or numeric ID
        """
        # Read either session_id or session_url from the opposite
        if session_id is not None:
            session_url = self.urls['SESSION_DETAIL_PRINT_PATTERN'] % session_id
        elif session_url is not None:
            parsed = parse.search(self.urls['SESSION_DETAIL_PARSE_PATTERN'], session_url)
            session_id = parsed['session_id']

        logging.info("Getting session %d from %s", session_id, session_url)

        session = Session(numeric_id=session_id)

        time.sleep(self.config.WAIT_TIME)
        response = self.user_agent.open(session_url)
        # forms for later attachment download
        mechanize_forms = mechanize.ParseResponse(response, backwards_compat=False)
        # seek(0) is necessary to reset response pointer.
        response.seek(0)
        html = response.read()
        html = html.replace('&nbsp;', ' ')
        parser = etree.HTMLParser()
        dom = etree.parse(StringIO(html), parser)

        # check for page errors
        try:
            page_title = dom.xpath('//h1')[0].text
            if 'Fehlermeldung' in page_title:
                logging.info("Page %s cannot be accessed due to server error", session_url)
                if self.options.verbose:
                    print "Page %s cannot be accessed due to server error" % session_url
                return
            if 'Berechtigungsfehler' in page_title:
                logging.info("Page %s cannot be accessed due to permissions", session_url)
                if self.options.verbose:
                    print "Page %s cannot be accessed due to permissions" % session_url
                return
        except:
            pass
        try:
            error_h3 = dom.xpath('//h3[@class="smc_h3"]')[0].text.strip()
            if 'Keine Daten gefunden' in error_h3:
                logging.info("Page %s does not contain any agenda items", session_url)
                if self.options.verbose:
                    print "Page %s does not contain agenda items" % session_url
                return
        except:
            pass

        session.original_url = session_url

        # Session title
        try:
            session.title = dom.xpath(self.xpath['SESSION_DETAIL_TITLE'])[0].text
        except:
            logging.critical('Cannot find session title element using XPath SESSION_DETAIL_TITLE')
            raise TemplateError('Cannot find session title element using XPath SESSION_DETAIL_TITLE')

        # Committe link
        try:
            links = dom.xpath(self.xpath['SESSION_DETAIL_COMMITTEE_LINK'])
            for link in links:
                href = link.get('href')
                parsed = parse.search(self.urls['COMMITTEE_DETAIL_PARSE_PATTERN'], href)
                if parsed is not None:
                    session.committee_id = parsed['committee_id']
        except:
            logging.critical('Cannot find link to committee detail page using SESSION_DETAIL_COMMITTEE_LINK_XPATH')
            raise TemplateError('Cannot find link to committee detail page using SESSION_DETAIL_COMMITTEE_LINK_XPATH')

        # Session identifier, date, address etc
        tds = dom.xpath(self.xpath['SESSION_DETAIL_IDENTIFIER_TD'])
        if len(tds) == 0:
            logging.critical('Cannot find table fields using SESSION_DETAIL_IDENTIFIER_TD_XPATH')
            raise TemplateError('Cannot find table fields using SESSION_DETAIL_IDENTIFIER_TD_XPATH')
        else:
            for n in range(0, len(tds)):
                try:
                    tdcontent = tds[n].text.strip()
                    nextcontent = tds[n + 1].text.strip()
                except:
                    continue
                if tdcontent == 'Sitzung:':
                    session.identifier = nextcontent
                elif tdcontent == 'Gremium:':
                    session.committee_name = nextcontent
                elif tdcontent == 'Datum:':
                    datestring = nextcontent
                    if tds[n + 2].text == 'Zeit:':
                        if (n + 3) in tds and tds[n + 3].text is not None:
                            datestring + ' ' + tds[n + 3].text
                    session.date_start = datestring
                elif tdcontent == 'Raum:':
                    session.address = " ".join(tds[n + 1].xpath('./text()'))
                elif tdcontent == 'Bezeichnung:':
                    session.description = nextcontent
            if not hasattr(session, 'identifier'):
                logging.critical('Cannot find session identifier using XPath SESSION_DETAIL_IDENTIFIER_TD')
                raise TemplateError('Cannot find session identifier using XPath SESSION_DETAIL_IDENTIFIER_TD')

        # Agendaitems
        found_attachments = []
        rows = dom.xpath(self.xpath['SESSION_DETAIL_AGENDA_ROWS'])
        if len(rows) == 0:
            logging.critical('Cannot find agenda using XPath SESSION_DETAIL_AGENDA_ROWS')
            raise TemplateError('Cannot find agenda using XPath SESSION_DETAIL_AGENDA_ROWS')
        else:
            agendaitems = {}
            agendaitem_id = None
            public = True
            for row in rows:
                row_id = row.get('id')
                row_classes = row.get('class').split(' ')
                fields = row.xpath('td')
                number = fields[0].xpath('./text()')
                if len(number) > 0:
                    number = number[0]
                if number == []:
                    number = None
                #print "number: %s" % number
                if row_id is not None:
                    # Agendaitem main row
                    agendaitem_id = row_id.rsplit('_', 1)[1]
                    agendaitems[agendaitem_id] = {}
                    agendaitems[agendaitem_id]['id'] = int(agendaitem_id)
                    if number is not None:
                        agendaitems[agendaitem_id]['number'] = number
                    agendaitems[agendaitem_id]['subject'] = "; ".join(fields[1].xpath('./text()'))
                    agendaitems[agendaitem_id]['public'] = public
                    # submission links
                    links = row.xpath(self.xpath['SESSION_DETAIL_AGENDA_ROWS_SUBMISSION_LINK'])
                    submissions = []
                    for link in links:
                        href = link.get('href')
                        if href is None:
                            continue
                        parsed = parse.search(self.urls['SUBMISSION_DETAIL_PARSE_PATTERN'], href)
                        if parsed is not None:
                            submission = Submission(numeric_id=int(parsed['submission_id']),
                                                    identifier=link.text)
                            submissions.append(submission)
                            # Add submission to submission queue
                            if hasattr(self, 'submission_queue'):
                                self.submission_queue.add(int(parsed['submission_id']))
                    if len(submissions):
                        agendaitems[agendaitem_id]['submissions'] = submissions
                    """
                    Note: we don't scrape agendaitem-related attachments for now,
                    based on the assumption that they are all found via submission
                    detail pages. All we do here is get a list of attachment IDs
                    in found_attachments
                    """
                    #attachments = []
                    forms = row.xpath('.//form')
                    for form in forms:
                        for hidden_field in form.xpath('input'):
                            if hidden_field.get('name') != 'DT':
                                continue
                            attachment_id = hidden_field.get('value')
                            #attachments.append(attachment_id)
                            found_attachments.append(attachment_id)
                    #if len(attachments):
                    #    agendaitems[agendaitem_id]['attachments'] = attachments

                elif 'smc_tophz' in row_classes:
                    # additional (optional row for agendaitem)
                    label = fields[1].text
                    value = fields[2].text
                    if label is not None and value is not None:
                        label = label.strip()
                        value = value.strip()
                        #print (label, value)
                        if label in ['Ergebnis:', 'Beschluss:']:
                            if value in self.config.RESULT_STRINGS:
                                agendaitems[agendaitem_id]['result'] = self.config.RESULT_STRINGS[value]
                            else:
                                logging.warn("String '%s' not found in configured RESULT_STRINGS", value)
                                if self.options.verbose:
                                    print "WARNING: String '%s' not found in RESULT_STRINGS\n" % value
                                agendaitems[agendaitem_id]['result'] = value
                        elif label == 'Bemerkung:':
                            agendaitems[agendaitem_id]['result_note'] = value
                        elif label == 'Abstimmung:':
                            agendaitems[agendaitem_id]['voting'] = value
                        else:
                            logging.critical("Agendaitem info label '%s' is unknown", label)
                            raise ValueError('Agendaitem info label "%s" is unknown' % label)

                elif 'smcrowh' in row_classes:
                    # Subheading (public / nonpublic part)
                    if fields[0].text is not None and "Nicht öffentlich" in fields[0].text.encode('utf-8'):
                        public = False
            #print json.dumps(agendaitems, indent=2)
            session.agendaitems = agendaitems.values()

        # session-related attachments
        containers = dom.xpath(self.xpath['SESSION_DETAIL_ATTACHMENTS'])
        for container in containers:
            classes = container.get('class')
            if classes is None:
                continue
            classes = classes.split(' ')
            if self.xpath['SESSION_DETAIL_ATTACHMENTS_CONTAINER_CLASSNAME'] not in classes:
                continue
            attachments = []
            rows = container.xpath('.//tr')
            for row in rows:
                forms = row.xpath('.//form')
                for form in forms:
                    #print "Form: ", form
                    name = " ".join(row.xpath('./td/text()')).strip()
                    for hidden_field in form.xpath('input'):
                        if hidden_field.get('name') != 'DT':
                            continue
                        attachment_id = hidden_field.get('value')
                        # make sure to add only those which aren't agendaitem-related
                        if attachment_id not in found_attachments:
                            attachment = Attachment(
                                identifier=attachment_id,
                                name=name
                            )
                            # Traversing the whole mechanize response to submit this form
                            for mform in mechanize_forms:
                                #print "Form found: '%s'" % mform
                                for control in mform.controls:
                                    if control.name == 'DT' and control.value == attachment_id:
                                        #print "Found matching form: ", control.name, control.value
                                        attachment = self.get_attachment_file(attachment, mform)
                            attachments.append(attachment)
                            found_attachments.append(attachment_id)
            if len(attachments):
                session.attachments = attachments

        oid = self.db.save_session(session)
        if self.options.verbose:
            logging.info("Session %d stored with _id %s", session_id, oid)
Ejemplo n.º 60
0
 def test_pos(self):
     # basic search() test
     r = parse.search("a {} c", " a b c ", 2)
     self.assertEqual(r, None)