Esempio n. 1
0
def run(input_file, output_file, raw, long, lat, time_interval, period,
        area_limit):
    PERIOD_IN_SECONDS = period * 24 * 60 * 60
    TIME_INTERVAL = 60 * 60 * time_interval

    lines = input_file.readlines()
    records = []

    def get_longtitude_after_period(long):
        return long + 180 * period / 14 - 360

    def timestamp_from_date(date_s):
        return time.mktime(
            datetime.strptime(date_s, '%Y-%m-%d %H:%M:%S').timetuple())

    for i in range(len(lines)):
        record = common.Record()
        record.__dict__.update(json.loads(lines[i]))
        try:
            record.time = timestamp_from_date(record.time)
        except:
            continue
        records.append(record)

    records.sort(key=lambda record: record.time)
    times = [x.time for x in records]
    sunspots = common.getSunspotsFromRecords(records)

    def is_same(first_record, second_record):
        long_diff = abs(
            get_longtitude_after_period(first_record.longtitude) -
            second_record.longtitude)
        lat_diff = abs(first_record.latitude - second_record.latitude)
        return long_diff < long and lat_diff < lat and \
               (long_diff / long) ** 2 + (lat_diff / lat) ** 2 <= 1

    spots_to_check = [
        sunspot for sunspot in sunspots if sunspot.records[-1].longtitude >=
        (90 - 180 / 14) and sunspot.area >= area_limit
    ]

    for spot in spots_to_check:
        record = spot.records[-1]
        left = bisect.bisect_left(
            times, record.time + PERIOD_IN_SECONDS - TIME_INTERVAL)
        right = bisect.bisect_left(
            times, record.time + PERIOD_IN_SECONDS + TIME_INTERVAL)
        for record2 in records[left:right]:
            if not record2.old and is_same(record, record2):
                if raw:
                    output_file.write(
                        str(record.group_id) + ' ' + str(record2.group_id) +
                        '\n')
                record2.old = True
                record2.previous_id = record.group_id
                break

    if not raw:
        for record in records:
            output_file.write(json.dumps(record.__dict__) + "\n")
 def get_hash(self, kw, random_=0, limit_=None, sample_=None):
     cur = self.con.cursor()
     cmd = "select * from " + self.tname + " where id>=0"
     values = []
     for k, v in kw.items():
         cmd += " and %s=?" % k
         conv = self.converters.get(k, None)
         if conv is not None: v = conv.pickle(v)
         values += [v]
     if sample_:
         cmd += " and id%%%d=abs(random())%%%d" % (sample_, sample_)
     if random_:
         cmd += " order by random()"
     if limit_ is not None:
         cmd += " limit %d" % limit_
     if self.verbose: print "#", cmd, values
     if debug: print cmd
     for row in cur.execute(cmd, values):
         result = common.Record()
         for k in row.keys():
             conv = self.converters.get(k, None)
             v = row[k]
             if conv is not None:
                 v = conv.unpickle(v)
             setattr(result, k, v)
         yield result
     cur.close()
     del cur
Esempio n. 3
0
def run(input_file, output_file, raw, angle_step, area_step_low,
        area_limit_low, area_step_high, area_limit_high):
    AREA_STEP = area_step_high
    AREA_LIMIT = area_limit_high

    def getFrame(arr, angle_limit, area_limit):
        return [
            x for x in arr if not x.filtered and not x.old and
            (abs(x.records[0].longtitude) >= angle_limit - angle_step) and (
                abs(x.records[0].longtitude) < angle_limit) and (
                    x.area >= area_limit - AREA_STEP) and (x.area < area_limit)
        ]

    lines = input_file.readlines()
    records = []

    for i in range(len(lines)):
        record = common.Record()
        record.__dict__.update(json.loads(lines[i]))
        records.append(record)

    sunspots = common.getSunspotsFromRecords(records)

    random.shuffle(sunspots)

    left = [x for x in sunspots if x.records[0].longtitude <= 0]
    right = [x for x in sunspots if x.records[0].longtitude > 0]

    def apply_filter():
        for j in range(1, AREA_LIMIT // AREA_STEP + 1):
            area_limit = AREA_STEP * j
            for i in range(1, 90 // angle_step + 1):
                angle_limit = i * angle_step
                count = max(
                    0,
                    len(getFrame(left, angle_limit, area_limit)) -
                    len(getFrame(right, angle_limit, area_limit)))
                for x in getFrame(left, angle_limit, area_limit)[:count]:
                    x.filtered = True

    apply_filter()
    AREA_STEP = area_step_low
    AREA_LIMIT = area_limit_low
    apply_filter()

    for x in left:
        if not x.filtered:
            if raw:
                output_file.write(str(x.id) + "\n")
            else:
                for y in x.records:
                    output_file.write(json.dumps(y.__dict__) + "\n")

    for x in right:
        if raw:
            output_file.write(str(x.id) + "\n")
        else:
            for y in x.records:
                output_file.write(json.dumps(y.__dict__) + "\n")
Esempio n. 4
0
def run(input_file, output_file):
    for line in input_file.readlines():
        if len(line) < 74: continue

        record = common.Record()

        record.group_id = line[12:20]

        try:
            record.group_id = int(record.group_id)
        except:
            continue

        record.latiitude = float(line[63:68])
        record.longtitude = float(line[57:62])
        record.time = formatDate(line[:4], line[4:6], line[6:8], line[9:12])
        record.area = int(line[40:44])

        output_file.write(json.dumps(record.__dict__) + '\n')
Esempio n. 5
0
def run(input_file, output_file):
    lines = input_file.readlines()

    progress = 0

    for i in range(len(lines)):
        record = common.Record()
        record.__dict__.update(json.loads(lines[i]))

        coord = SkyCoord(
            lat=record.latitude * u.deg,
            lon=record.longtitude * u.deg,
            obstime=record.time,
            frame=frames.HeliographicCarrington)

        record.longtitude = coord.transform_to(frames.HeliographicStonyhurst).lon.deg

        percents_completed = i * 1000 // len(lines)
        if percents_completed != progress:
            progress = percents_completed
            print("Processing: " + str(progress / 10) + "%")

        output_file.write(json.dumps(record.__dict__) + "\n")
Esempio n. 6
0

input_file2 = open("data/data.csv", "r")
numberByDate = dict()
for line in input_file2.readlines():
    (year, month, day, skip, number) = line.split(";")[:5]
    if int(year) > 2013 or int(year) < 1900:
        continue
    numberByDate[year + "-" + month + "-" + day] = (int(number), 0, 0)
plt.plot(to_average([i[0] for i in numberByDate.values()]),
         label='Daily total sunspot number')

input_file = open("data/rgofull.marked.json", "r")
records = []
for line in input_file.readlines():
    records.append(common.Record())
    records[-1].__dict__.update(json.loads(line))
sunspots = common.getSunspotsFromRecords(records)
for sunspot in sunspots:
    if not sunspot.old:
        continue
    for record in sunspot.records:
        date = datetime.utcfromtimestamp(record.time).strftime('%Y-%m-%d')
        if record.latitude >= 0:
            numberByDate[date] = (numberByDate[date][0],
                                  numberByDate[date][1] + record.area,
                                  numberByDate[date][2])
        else:
            numberByDate[date] = (numberByDate[date][0], numberByDate[date][1],
                                  numberByDate[date][2] + record.area)
Esempio n. 7
0
    def recognizeLineSeg(self, image):
        """Recognize a line.
        lattice: result of recognition
        rseg: intarray where the raw segmentation will be put
        image: line image to be recognized"""

        # first check whether the input dimensions are reasonable

        if image.shape[0] < 10:
            raise RecognitionError(
                "line image not high enough (maybe rescale?)", image=image)
        if image.shape[0] > 200:
            raise RecognitionError("line image too high (maybe rescale?)",
                                   image=image)
        if image.shape[1] < 10:
            raise RecognitionError(
                "line image not wide enough (segmentation error?)",
                image=image)
        if image.shape[1] > 10000:
            raise RecognitionError("line image too wide???", image=image)

        # FIXME for some reason, something down below
        # depends on this being a bytearray image, so
        # we're normalizing it here to that type
        image = array(image * 255.0 / amax(image), 'B')

        # compute the raw segmentation
        rseg = self.segmenter.charseg(image)
        if self.debug: show_segmentation(rseg)  # FIXME
        rseg = renumber_labels(rseg, 1)  # FIXME
        if amax(rseg) < self.minsegs:
            raise RecognitionError("not enough segments in raw segmentation",
                                   rseg=rseg)
        # self.grouper = grouper.Grouper()
        self.grouper.setSegmentation(rseg)

        # compute the geometry (might have to use
        # CCS segmenter if this doesn't work well)
        geo = docproc.seg_geometry(rseg)

        # compute the median segment height
        heights = []
        for i in range(self.grouper.length()):
            (y0, x0, y1, x1) = self.grouper.boundingBox(i)
            heights.append(y1 - y0)
        mheight = median(array(heights))
        if mheight < 8:
            raise RecognitionError(
                "median line height too small (maybe rescale prior to recognition)",
                mheight=mheight)
        if mheight > 100:
            raise RecognitionError(
                "median line height too large (maybe rescale prior to recognition)",
                mheight=mheight)
        self.mheight = mheight

        # invert the input image (make a copy first)
        old = image
        image = amax(image) - image

        # initialize the whitespace estimator
        self.whitespace.setLine(image, rseg)

        # this holds the list of recognized characters if keep!=0
        self.chars = []

        # now iterate through the characters
        for i in range(self.grouper.length()):
            # get the bounding box for the character (used later)
            (y0, x0, y1, x1) = self.grouper.boundingBox(i)

            # compute relative geometry
            aspect = (y1 - y0) * 1.0 / (x1 - x0)
            try:
                rel = docproc.rel_char_geom((y0, y1, x0, x1), geo)
            except:
                traceback.print_exc()
                raise RecognitionError("bad line geometry", geo=geo)
            ry, rw, rh = rel
            assert rw > 0 and rh > 0, "error: rw=%g rh=%g" % (rw, rh)
            rel = docproc.rel_geo_normalize(rel)

            # extract the character image (and optionally display it)
            (raw, mask) = self.grouper.extractWithMask(image, i, 1)
            char = raw / 255.0
            if self.debug:
                imshow(char)
                raw_input()

            # Add a skip transition with the pixel width as cost.
            # This ensures that the lattice is at least connected.
            # Note that for typical character widths, this is going
            # to be much larger than any per-charcter cost.
            if self.add_rho:
                self.grouper.setClass(i, ocrofst.L_RHO,
                                      self.rho_scale * raw.shape[1])

            # compute the classifier output for this character
            # FIXME parallelize this
            outputs = self.cmodel.coutputs(char, geometry=rel)
            outputs = [(x[0], -log(x[1])) for x in outputs]
            self.chars.append(
                common.Record(index=i, image=char, outputs=outputs))

            # estimate the space cost
            sc = self.whitespace.classifySpace(x1)
            yes_space = min(self.maxspacecost, -log(sc[1]))
            no_space = min(self.maxspacecost, -log(sc[0]))

            # maybe add a transition on "_" that we can use to skip
            # this character if the transcription contains a "~"
            self.grouper.setClass(i, "~", self.reject_cost)

            # add the top classes to the lattice
            outputs.sort(key=lambda x: x[1])
            for cls, cost in outputs[:self.nbest]:
                # don't add anything with a cost above maxcost
                # if cost>self.maxcost and cls!="~": continue
                if cls == "~": continue
                if cls in self.debug_cls:
                    print "debug",self.grouper.start(i),self.grouper.end(i),"cls",cls,"cost",cost,\
                        "y %.2f w %.2f h %.2f"%(rel[0],rel[1],rel[2])

                # letters are never small, so we skip small bounding boxes that
                # are categorized as letters; this is an ugly special case, but
                # it is quite common
                category = unicodedata.category(unicode(cls[0]))
                if (y1 -
                        y0) < self.min_height * mheight and category[0] == "L":
                    # add an empty transition to allow skipping junk
                    # (commented out right now because I'm not sure whether
                    # the grouper can handle it; FIXME)
                    # self.grouper.setClass(i,"",1.0)
                    continue

                if type(cls) == int:
                    assert self.allow_any or (cls>=0 and cls<0x110000),\
                        "classifier returned non-unicode class: %s"%(hex(cls),)
                elif type(cls) == str:
                    assert len(cls)<4,\
                        ("classifier returned too many chars: %s",cls)
                # for anything else, just add the classified character to the grouper
                if type(cls) == str or type(cls) == unicode:
                    self.grouper.setClass(i, cls, cost)
                elif type(cls) == int:
                    assert cls >= 0 and cls < 0x110000, "bad class: %s" % (
                        hex(cls), )
                    self.grouper.setClass(i, cls, cost)
                else:
                    raise Exception("bad class type: %s" % type(cls))
                self.grouper.setSpaceCost(i, float(yes_space), float(no_space))

        # extract the recognition lattice from the grouper
        lattice = self.grouper.getLattice()

        # return the raw segmentation as a result
        return lattice, rseg
def run(input_file, long, lat, time_interval):
    TIME_INTERVAL = 60 * 60 * time_interval

    def get_longtitude_after_period(long):
        return long + 180 * PERIOD_IN_DAYS / 14

    def timestamp_from_date(date_s):
        return time.mktime(datetime.strptime(date_s, '%Y-%m-%d %H:%M:%S').timetuple())

    lines = input_file.readlines()
    records = []

    for i in range(len(lines)):
        record = common.Record()
        record.__dict__.update(json.loads(lines[i]))
        try:
            record.time = timestamp_from_date(record.time)
        except:
            continue
        records.append(record)

    records.sort(key=lambda record: record.time)
    times = [x.time for x in records]
    sunspots = common.getSunspotsFromRecords(records)

    def mark_and_get_accuracy(long_interval, lat_interval):
        def is_same(first_record, second_record):
            long_diff = abs(get_longtitude_after_period(first_record.longtitude) - second_record.longtitude)
            lat_diff = abs(first_record.latitude - second_record.latitude)
            return long_diff < long_interval and lat_diff < lat_interval and \
                   (long_diff / long_interval)**2 + (lat_diff / lat_interval)**2 <= 1

        def get_score(first_record, second_record):
            r = math.sqrt(second_record.area) / 2
            lat_l = max(second_record.latitude - r, first_record.latitude - lat_interval)
            lat_r = min(second_record.latitude + r, first_record.latitude + lat_interval)
            long_l = max(second_record.longtitude - r, get_longtitude_after_period(first_record.longtitude) - long_interval)
            long_r = min(second_record.longtitude + r, get_longtitude_after_period(first_record.longtitude) + long_interval)
            return max(0, (lat_r - lat_l)) * max(0, (long_r - long_l))

        for record in records:
            record.marked = False

        long_live_sunspots = [sunspot for sunspot in sunspots if sunspot.records[-1].time - sunspot.records[0].time >= PERIOD_IN_SECONDS]

        score = 0
        count = 0
        for spot in long_live_sunspots:
            record = spot.records[0]
            left = bisect.bisect_left(times, record.time + PERIOD_IN_SECONDS - TIME_INTERVAL)
            right = bisect.bisect_left(times, record.time + PERIOD_IN_SECONDS + TIME_INTERVAL)
            for record2 in records[left:right]:
                if not record2.marked and is_same(record, record2):
                    record2.marked = True
                    score += get_score(record, record2)
                    count += 1
                    break

        return score / (lat_interval * long_interval)


    for j in range(7, 14):
        PERIOD_IN_DAYS = j
        PERIOD_IN_SECONDS = PERIOD_IN_DAYS * 24 * 60 * 60
        temp = []
        for i in range(1, 21):
            t = 1 + (i - 10) / 10
            long_interval = t * long
            lat_interval = t * lat
            temp.append(mark_and_get_accuracy(long_interval, lat_interval))
        plt.plot(temp, label=str(j) + ' days')
        plt.legend()
    plt.xlabel('ellipse linear size coefficient')
    plt.ylabel('accuracy score')
    plt.xticks([i for i in range(20)], [str(1.15 + (i - 10) / 10)[:3] for i in range(20)], rotation=-45)
    plt.grid()
    plt.show()
#!/usr/bin/python3

import json

import common

main_file = open("data/rgofull.json", "r")
merging_file = open("data/rgofull.hsc.json", "r")

lines = main_file.readlines()
records = []

for i in range(len(lines)):
    record = common.Record()
    record.__dict__.update(json.loads(lines[i]))
    records.append(record)

lines = merging_file.readlines()

for i in range(len(lines)):
    record = common.Record()
    record.__dict__.update(json.loads(lines[i]))
    records[i].longtitude = record.longtitude

main_file.close()
output_file = open("data/rgofull.hsc.json", "w")

for record in records:
    output_file.write(json.dumps(record.__dict__) + '\n')