コード例 #1
0
def getPS1TrainingSetCutouts(opts):

    if type(opts) is dict:
        options = Struct(**opts)
    else:
        options = opts

    import yaml
    with open(options.configFile) as yaml_file:
        config = yaml.load(yaml_file)

    stampLocation = options.stampLocation


    if not os.path.exists(stampLocation):
        os.makedirs(stampLocation)

    username = config['databases']['local']['username']
    password = config['databases']['local']['password']
    database = config['databases']['local']['database']
    hostname = config['databases']['local']['hostname']

    conn = dbConnect(hostname, username, password, database)
    if not conn:
        print("Cannot connect to the database")
        return 1


    images = getTrainingSetImages(conn)

    writePS1GoodBadFiles(options.stampLocation, images)

    conn.close()
コード例 #2
0
def runKerasTensorflowClassifierMultiprocess(opts):

    # Use utils.Struct to convert the dict into an object for compatibility with old optparse code.
    if type(opts) is dict:
        options = Struct(**opts)
    else:
        options = opts

    import yaml
    with open(options.configFile) as yaml_file:
        config = yaml.load(yaml_file)

    username = config['databases']['local']['username']
    password = config['databases']['local']['password']
    database = config['databases']['local']['database']
    hostname = config['databases']['local']['hostname']

    db = []

    conn = dbConnect(hostname, username, password, database)
    if not conn:
        print("Cannot connect to the database")
        return 1

    # If the list isn't specified assume it's the Eyeball List.
    if options.listid is not None:
        try:
            detectionList = int(options.listid)
            if detectionList < 0 or detectionList > 8:
                print ("Detection list must be between 0 and 8")
                return 1
        except ValueError as e:
            sys.exit("Detection list must be an integer")

    # 2018-07-31 KWS We have PS1 data. Don't bother with the HKO/MLO ATLAS data.
    ps1Data = False
    if options.ps1classifier:
        ps1Data = True

    objectList = []
    # if candidates are specified in the options, then override the list.
    if len(options.candidate) > 0:
        objectList = [{'id': int(candidate)} for candidate in options.candidate]
    else:
        objectList = getObjectsByList(conn, database, listId = int(options.listid), ps1Data = ps1Data)


    # 2019-06-07 KWS For reasons not entirely clear, Tensorflow seems to exhaust every last
    #                bit of CPU and memory.  So let's divide the list by 10 if the list is
    #                larger than 10000 in size.

    if len(objectList) > 100:
        bin, subLists = splitList(objectList, bins=16)
    else:
        subLists = [objectList]

    for l in subLists:
        currentDate = datetime.datetime.now().strftime("%Y:%m:%d:%H:%M:%S")
        (year, month, day, hour, min, sec) = currentDate.split(':')
        dateAndTime = "%s%s%s_%s%s%s" % (year, month, day, hour, min, sec)

        objectsForUpdate = []

        if len(objectList) > 0:
            # 2019-08-24 KWS Hard-wire the number of workers.
            nProcessors, listChunks = splitList(l, bins=28)

            print ("%s Parallel Processing..." % (datetime.datetime.now().strftime("%Y:%m:%d:%H:%M:%S")))
            objectsForUpdate = parallelProcess(db, dateAndTime, nProcessors, listChunks, worker, miscParameters = [options])
            print ("%s Done Parallel Processing" % (datetime.datetime.now().strftime("%Y:%m:%d:%H:%M:%S")))

            print ("TOTAL OBJECTS TO UPDATE = %d" % len(objectsForUpdate))

    #    if len(objectsForUpdate) > 0 and options.update:
    #        updateObjects(conn, objectsForUpdate)

        # Sort the combined list.
        objectsForUpdate = sorted(objectsForUpdate, key = lambda x: x[1])

        if options.outputcsv is not None:
            with open(options.outputcsv, 'w') as f:
                for row in objectsForUpdate:
                    print(row[0], row[1])
                    f.write('%s,%f\n' % (row[0], row[1]))

        if options.update:
            for row in objectsForUpdate:
                updateTransientRBValue(conn, row[0], row[1], ps1Data = ps1Data)

    conn.close()
コード例 #3
0
def runKerasTensorflowClassifier(opts, processNumber=None):

    # Use utils.Struct to convert the dict into an object for compatibility with old optparse code.
    if type(opts) is dict:
        options = Struct(**opts)
    else:
        options = opts

    import yaml
    with open(options.configFile) as yaml_file:
        config = yaml.load(yaml_file)

    username = config['databases']['local']['username']
    password = config['databases']['local']['password']
    database = config['databases']['local']['database']
    hostname = config['databases']['local']['hostname']

    conn = dbConnect(hostname, username, password, database)
    if not conn:
        print("Cannot connect to the database")
        return 1

    # 2018-07-31 KWS We have PS1 data. Don't bother with the HKO/MLO ATLAS data.
    ps1Data = False
    if options.ps1classifier:
        ps1Data = True

    if options.listid is not None:
        try:
            detectionList = int(options.listid)
            if detectionList < 0 or detectionList > 8:
                print("Detection list must be between 0 and 8")
                return 1
        except ValueError as e:
            sys.exit("Detection list must be an integer")

    objectList = []
    imageFilenames = []

    # if candidates are specified in the options, then override the list.
    if len(options.candidate) > 0:
        objectList = [{
            'id': int(candidate)
        } for candidate in options.candidate]
    else:
        # Only collect by the list ID if we are running in single threaded mode
        if processNumber is None:
            objectList = getObjectsByList(conn,
                                          database,
                                          listId=int(options.listid),
                                          ps1Data=ps1Data)

    if len(objectList) > 0:
        imageFilenames = getImages(conn,
                                   database,
                                   objectList,
                                   imageRoot=options.imageroot)
        if len(imageFilenames) == 0:
            print("NO IMAGES")
            conn.close()
            return []

    if ps1Data:
        objectDictPS1 = getRBValues([f['filename'] for f in imageFilenames],
                                    options.ps1classifier,
                                    extension=1)
        objectScores = defaultdict(dict)
        for k, v in list(objectDictPS1.items()):
            objectScores[k]['ps1'] = np.array(v)
        finalScores = {}

        objects = list(objectScores.keys())
        for object in objects:
            finalScores[object] = np.median(objectScores[object]['ps1'])
    else:
        # Split the images into HKO and MLO data so we can apply the HKO and MLO machines separately.
        hkoFilenames = []
        for row in imageFilenames:
            if '02a' in row['filename']:
                hkoFilenames.append(row['filename'])
        mloFilenames = []
        for row in imageFilenames:
            if '01a' in row['filename']:
                mloFilenames.append(row['filename'])

        #filename = 'hko_57966_20x20_skew3_signpreserve_f77475b232425.mat'
        #train_data, test_data, image_dim = load_data(filename)
        #x_test = test_data[0]

        #hkoClassifier = '/home/kws/keras/hko_57966_20x20_skew3_signpreserve_f77475b232425.model.best.hdf5'
        #mloClassifier = '/home/kws/keras/atlas_mlo_57925_20x20_skew3_signpreserve_f331184b993662.model.best.hdf5'

        objectDictHKO = getRBValues(hkoFilenames, options.hkoclassifier)
        objectDictMLO = getRBValues(mloFilenames, options.mloclassifier)

        # Now we have two dictionaries. Combine them.

        objectScores = defaultdict(dict)

        for k, v in list(objectDictHKO.items()):
            objectScores[k]['hko'] = np.array(v)
        for k, v in list(objectDictMLO.items()):
            objectScores[k]['mlo'] = np.array(v)

        # Some objects will have data from two telescopes, some only one.
        # If we have data from two telescopes, choose the median value of the longest length list.

        finalScores = {}

        objects = list(objectScores.keys())
        for object in objects:
            if len(objectScores[object]) > 1:
                hkoLen = len(objectScores[object]['hko'])
                mloLen = len(objectScores[object]['mlo'])
                if mloLen > hkoLen:
                    finalScores[object] = np.median(
                        objectScores[object]['mlo'])
                else:
                    # Only if MLO is larger than HKO, use MLO. Otherise use HKO
                    finalScores[object] = np.median(
                        objectScores[object]['hko'])

            else:
                try:
                    finalScores[object] = np.median(
                        objectScores[object]['hko'])
                except KeyError as e:
                    finalScores[object] = np.median(
                        objectScores[object]['mlo'])

    finalScoresSorted = OrderedDict(
        sorted(list(finalScores.items()), key=lambda t: t[1]))

    if options.outputcsv is not None:
        prefix = options.outputcsv.split('.')[0]
        suffix = options.outputcsv.split('.')[-1]

        if suffix == prefix:
            suffix = ''

        if suffix:
            suffix = '.' + suffix

        processSuffix = ''

        if processNumber is not None:
            processSuffix = '_%02d' % processNumber

        # Generate the insert statements
        with open('%s%s%s' % (prefix, processSuffix, suffix), 'w') as f:
            for k, v in list(finalScoresSorted.items()):
                print(k, finalScoresSorted[k])
                f.write('%s,%f\n' % (k, finalScoresSorted[k]))

    scores = list(finalScoresSorted.items())

    if options.update and processNumber is None:
        # Only allow database updates in single threaded mode. Otherwise multithreaded code
        # does the updates at the end of processing. (Minimises table locks.)
        for row in scores:
            updateTransientRBValue(conn, row[0], row[1], ps1Data=ps1Data)

    conn.close()

    return scores
コード例 #4
0
def getATLASTrainingSetCutouts(opts):
    if type(opts) is dict:
        options = Struct(**opts)
    else:
        options = opts

    import yaml
    with open(options.configFile) as yaml_file:
        config = yaml.load(yaml_file)

    stampSize = int(options.stampSize)
    mjds = options.mjds
    if not mjds:
        print("No MJDs specified")
        return 1

    downloadThreads = int(options.downloadthreads)
    stampThreads = int(options.stampThreads)
    stampLocation = options.stampLocation
    if not os.path.exists(stampLocation):
        os.makedirs(stampLocation)
    username = config['databases']['local']['username']
    password = config['databases']['local']['password']
    database = config['databases']['local']['database']
    hostname = config['databases']['local']['hostname']

    conn = dbConnect(hostname, username, password, database)
    if not conn:
        print("Cannot connect to the database")
        return 1

    currentDate = datetime.datetime.now().strftime("%Y:%m:%d:%H:%M:%S")
    (year, month, day, hour, min, sec) = currentDate.split(':')
    dateAndTime = "%s%s%s_%s%s%s" % (year, month, day, hour, min, sec)

    asteroidExpsDict = defaultdict(list)
    for mjd in mjds:
        asteroidExps = getKnownAsteroids(conn,
                                         options.camera,
                                         int(mjd),
                                         pkn=900)
        for exp in asteroidExps:
            asteroidExpsDict[exp['obs']].append(exp)

    # Now create the files.  We need to have x, y as the first two items.

    #m.obs, d.x, d.y, d.mag, d.dmag, d.ra, d.dec
    header = "x,y,mag,dmag,ra,dec,obs".split(',')

    exposureList = []
    for k, v in asteroidExpsDict.items():
        exposureList.append(k)
        with open(stampLocation + '/' + 'good' + k + '.txt', 'w') as csvfile:
            w = csv.DictWriter(csvfile, fieldnames=header, delimiter=' ')
            #w.writeheader()
            for row in v:
                w.writerow(row)
    # So now let stampstorm do its stuff

    if len(exposureList) > 0:
        nProcessors, listChunks = splitList(exposureList, bins=stampThreads)

        print("%s Parallel Processing Good objects..." %
              (datetime.datetime.now().strftime("%Y:%m:%d:%H:%M:%S")))
        parallelProcess([],
                        dateAndTime,
                        nProcessors,
                        listChunks,
                        workerStampStorm,
                        miscParameters=[stampSize, stampLocation, 'good'],
                        drainQueues=False)
        print("%s Done Parallel Processing" %
              (datetime.datetime.now().strftime("%Y:%m:%d:%H:%M:%S")))

    junkExpsDict = defaultdict(list)
    for mjd in mjds:
        junkExps = getJunk(conn, options.camera, int(mjd))
        for exp in junkExps:
            junkExpsDict[exp['obs']].append(exp)

    exposureList = []
    for k, v in junkExpsDict.items():
        exposureList.append(k)
        with open(stampLocation + '/' + 'bad' + k + '.txt', 'w') as csvfile:
            w = csv.DictWriter(csvfile, fieldnames=header, delimiter=' ')
            #w.writeheader()
            for row in v:
                w.writerow(row)

    if len(exposureList) > 0:
        nProcessors, listChunks = splitList(exposureList, bins=stampThreads)

        print("%s Parallel Processing Bad objects..." %
              (datetime.datetime.now().strftime("%Y:%m:%d:%H:%M:%S")))
        parallelProcess([],
                        dateAndTime,
                        nProcessors,
                        listChunks,
                        workerStampStorm,
                        miscParameters=[stampSize, stampLocation, 'bad'],
                        drainQueues=False)
        print("%s Done Parallel Processing" %
              (datetime.datetime.now().strftime("%Y:%m:%d:%H:%M:%S")))

    conn.close()
    getGoodBadFiles(stampLocation)
コード例 #5
0
def main(argv=None):
    opts = docopt(__doc__, version='0.1')
    opts = cleanOptions(opts)

    # Use utils.Struct to convert the dict into an object for compatibility with old optparse code.
    options = Struct(**opts)

    json_path = options.data
    schema_files = options.schema
    cutoutsci_path = options.cutoutSci
    cutouttemp_path = options.cutoutTemp
    cutoutdiff_path = options.cutoutDiff
    mjdThreshold = float(options.mjdThreshold)

    alert_schema = combine_schemas(schema_files)

    import yaml
    with open(options.configfile) as yaml_file:
        config = yaml.load(yaml_file)

    username = config['databases']['local']['username']
    password = config['databases']['local']['password']
    database = config['databases']['local']['database']
    hostname = config['databases']['local']['hostname']

    # Now that we have all the data, we need to construct a properly formed alert - FOR EACH ROW.
    # NOTE - To get this to work, feed it junk test.json data.

    # The alerts can be written to a file - but don't forget, we are using the schemaless writer
    # which means that the schemaless reader MUST be used to read the data!
    alert = None

    # If we just have some json data, process it.  Otherwise read from the database.

    if json_path:
        with open(json_path) as file_text:
            json_data = json.load(file_text)

        avro_bytes = write_avro_data(json_data, alert_schema)

        # Load science stamp if included
        if cutoutsci_path is not None:
            cutoutTemplate = load_stamp(cutoutsci_path)
            json_data['cutoutScience'] = cutoutTemplate

        # Load template stamp if included
        if cutouttemp_path is not None:
            cutoutTemplate = load_stamp(cutouttemp_path)
            json_data['cutoutTemplate'] = cutoutTemplate

        # Load difference stamp if included
        if cutoutdiff_path is not None:
            cutoutDifference = load_stamp(cutoutdiff_path)
            json_data['cutoutDifference'] = cutoutDifference

        if options.writeFile:
            with open('/tmp/alert.avro', 'wb') as f:
                # NOTE - This code writes a schemaless message. To read it we need to pass the schema
                #        to the reader. How we pass this message to Kafka is the next problem to be
                #        resolved.
                avro_bytes.seek(0)
                data = avro_bytes.read()
                f.write(data)

        #m = read_avro_data_from_file('alert.avro', alert_schema)
        m = read_avro_data(avro_bytes, alert_schema)
        if options.readMessage:
            if m:
                # Print message text to screen
                message_text = {
                    k: m[k]
                    for k in m if k not in
                    ['cutoutScience', 'cutoutDifference', 'cutoutTemplate']
                }
                print(message_text)

                # Collect stamps as files written to local directory 'output' and check hashes match expected
                if m.get('cutoutScience') is not None:
                    stamp_temp_out = write_stamp_file(
                        message.get('cutoutScience'), 'output')
                    print('Science stamp ok:',
                          check_md5(args.cutoutSci, stamp_temp_out))

                if m.get('cutoutTemplate') is not None:
                    stamp_temp_out = write_stamp_file(
                        message.get('cutoutTemplate'), 'output')
                    print('Template stamp ok:',
                          check_md5(args.cutoutTemp, stamp_temp_out))

                if m.get('cutoutDifference') is not None:
                    stamp_diff_out = write_stamp_file(
                        message.get('cutoutDifference'), 'output')

                print("size in bytes of json text: %d" %
                      sys.getsizeof(message_text))
                raw_bytes = avro_bytes.getvalue()
                print("size in bytes of avro message: %d" %
                      sys.getsizeof(raw_bytes))

                print("size in bytes of json text: %d" %
                      sys.getsizeof(message_text))
                raw_bytes = avro_bytes.getvalue()
                print("size in bytes of avro message: %d" %
                      sys.getsizeof(raw_bytes))
        return 0

    conn = dbConnect(hostname, username, password, database)
    if not conn:
        print("Cannot connect to the database")
        return 1

    # Connect to the database and read out the ATLAS detections.
    records = getATLASIngestedDetections(conn, mjdThreshold)

    conn.close()

    alerts = []
    if options.writeFile and options.bulkMessage:
        for row in records:
            alert = {
                'alertId': row['db_det_id'],
                'atlas_object_id': row['atlas_object_id'],
                'candidate': row
            }
            alerts.append(alert)
        write_avro_data_to_file_with_schema('/tmp/alerts_bulk.avro',
                                            alert_schema, alerts)
        return

    for row in records:
        alert = {
            'alertId': row['db_det_id'],
            'atlas_object_id': row['atlas_object_id'],
            'candidate': row
        }

        avro_bytes = write_avro_data(alert, alert_schema)

        if options.readMessage:
            #m = read_avro_data_from_file('alert.avro', alert_schema)
            m = read_avro_data(avro_bytes, alert_schema)
            if m:
                # Print message text to screen
                message_text = {
                    k: m[k]
                    for k in m if k not in
                    ['cutoutScience', 'cutoutDifference', 'cutoutTemplate']
                }
                print(message_text)

                # Collect stamps as files written to local directory 'output' and check hashes match expected
                if m.get('cutoutScience') is not None:
                    stamp_temp_out = write_stamp_file(
                        message.get('cutoutScience'), 'output')
                    print('Science stamp ok:',
                          check_md5(args.cutoutSci, stamp_temp_out))

                if m.get('cutoutTemplate') is not None:
                    stamp_temp_out = write_stamp_file(
                        message.get('cutoutTemplate'), 'output')
                    print('Template stamp ok:',
                          check_md5(args.cutoutTemp, stamp_temp_out))

                if m.get('cutoutDifference') is not None:
                    stamp_diff_out = write_stamp_file(
                        message.get('cutoutDifference'), 'output')

                print("size in bytes of json text: %d" %
                      sys.getsizeof(message_text))
                raw_bytes = avro_bytes.getvalue()
                print("size in bytes of avro message: %d" %
                      sys.getsizeof(raw_bytes))

        if options.writeFile:
            if not options.bulkMessage:
                f = open('/tmp/alert_%s.avro' % row['db_det_id'], 'wb')
            avro_bytes.seek(0)
            data = avro_bytes.read()
            f.write(data)
            if not options.bulkMessage:
                f.close()

    return 0
コード例 #6
0
ファイル: poll_tns.py プロジェクト: lsst-uk/lasair-lsst
def getTNSData(opts):
    if type(opts) is dict:
        options = Struct(**opts)
    else:
        options = opts

#    import yaml
#    with open(options.configFile) as yaml_file:
#        config = yaml.load(yaml_file)

    username = settings.DB_USER_WRITE
    password = settings.DB_PASS_WRITE
    hostname = settings.DB_HOST
    database = 'ztf'
    #    database = settings.DB

    #    username = config['databases']['local']['username']
    #    password = config['databases']['local']['password']
    #    database = config['databases']['local']['database']
    #    hostname = config['databases']['local']['hostname']

    conn = dbConnect(hostname, username, password, database)
    if not conn:
        print(
            "ERROR in services/TNS/poll_tns: Cannot connect to the database\n")
        return 1

    radius = 3.0  # arcseconds from crossmatch
    if options.radius:
        radius = float(options.radius)

    inLastNumberOfDays = None
    if options.inLastNumberOfDays:
        inLastNumberOfDays = int(options.inLastNumberOfDays)

    status_code, content = pollTNS(page=int(options.pageNumber),
                                   resultSize=int(options.pageSize),
                                   inLastNumberDays=inLastNumberOfDays)

    #    csvEntries = csv.DictReader(content.splitlines(), delimiter=',')
    #    data = csvEntries
    data = csv.DictReader(content.splitlines(), delimiter=',')
    rowsAdded = 0
    rowsChanged = 0
    for row in data:
        if 'Name' in row:
            name = row['Name'].strip().split()
        else:
            print(row)
            sys.exit()

        if len(name) != 2:
            prefix = 'SN'
            suffix = name[0]
        else:
            prefix = name[0]
            suffix = name[1]

        if row['Discovery Date (UT)'].strip() == '0000-00-00 00:00:00':
            # Set the discovery date to January of the suffix name.
            discoveryDate = '%s-01-01 00:00:00' % suffix[0:4]
            row['Discovery Date (UT)'] = discoveryDate


#        if not 'Type' in row:  # sometimes TNS does not supply Type -- RDW
#            row['Type'] = 'null'

        row['prefix'] = prefix
        row['suffix'] = suffix
        ra, dec = coords_sex_to_dec(row['RA'], row['DEC'])
        if ra == 0 and dec == 0:
            print(
                "in services/TNS/poll_tns: Cannot store record for %s. No coordinates provided!\n"
                % row['Name'].strip())
            continue

        row['ra'] = ra
        row['dec'] = dec
        htm16 = htmCircle.htmID(16, ra, dec)
        row['htm16'] = htm16
        tnsEntry = getTNSRow(conn, suffix)
        if tnsEntry:
            if tnsEntry['tns_prefix'] != prefix:
                # The entry has been updated on TNS - classified! Otherwise do nothing!
                deleteTNSRow(conn, suffix)
                insertTNS(conn, row)
                print("Object %s has been updated\n" % row['suffix'])
                rowsChanged += 1
        else:
            insertTNS(conn, row)
            print("Object %s has been added\n" % row['suffix'])
            run_tns_crossmatch.tns_name_crossmatch(\
                    conn, row['suffix'], ra, dec, radius)

            rowsAdded += 1
        #print prefix, suffix, ra, dec, htm16, row['Discovery Date (UT)']

    print("Total rows added = %d, modified = %d\n" % (rowsAdded, rowsChanged))

    conn.commit()
    conn.close()