Esempio n. 1
0
def get_jenni(config_path):
    config_files = []

    config = configs.Configs([config_path])
    config.load_modules(config_files)

    try:
        Watcher()
    except Exception, e:
        print >> sys.stderr, 'Warning:', e, '(in __init__.py)'
def generate_vocab(data_path):
    train_files = glob.glob(data_path + 'train/*')
    valid_files = glob.glob(data_path + 'valid/*')

    cfgs = configs.Configs()

    all_dic = {}
    files = train_files + valid_files
    i = 0
    for file in tqdm.tqdm(files):
        with open(file, 'r', encoding='utf-8') as f:
            new = f.read()
        for w in new:
            all_dic[w] = all_dic.get(w, 0) + 1
        i += 1
        # if i == 2000:
        #     break
    all_dic = {i: j for i, j in all_dic.items() if j >= 100}

    dic = {}
    w2i = {}
    i2w = {}
    w2w = {}

    for w in [cfgs.W_PAD, cfgs.W_UNK, cfgs.W_EOS]:
        w2i[w] = len(dic)
        i2w[w2i[w]] = w
        dic[w] = 10000
        w2w[w] = w

    for w, tf in all_dic.items():
        if w in dic:
            continue
        w2i[w] = len(dic)
        i2w[w2i[w]] = w
        dic[w] = tf
        w2w[w] = w

    hfw = []
    sorted_x = sorted(dic.items(), key=operator.itemgetter(1), reverse=True)
    # print(sorted_x)
    for w in sorted_x:
        hfw.append(w[0])

    assert len(hfw) == len(dic)
    assert len(w2i) == len(dic)
    print("dump dict...")
    print(len(w2i))
Esempio n. 3
0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Handle connections to WMS API"""

import ssl
import urllib2
import urlparse
import logging
import configs

CONFIG_FILE = "/opt/google/gehttpd/wsgi-bin/wms/ogc/wms.cfg"
CONFIGS = configs.Configs(CONFIG_FILE)

logger = logging.getLogger("wms_maps")


def HandleConnection(url):
    logger.debug("Opening url: [%s]", url)

    if CONFIGS.GetStr("DATABASE_HOST") != "":
        url = CONFIGS.GetStr("DATABASE_HOST") + urlparse.urlsplit(url)[2:]

    fp = None
    try:
        # Set the context based on cert requirements
        if CONFIGS.GetBool("VALIDATE_CERTIFICATE"):
            cert_file = CONFIGS.GetStr("CERTIFICATE_CHAIN_PATH")
Esempio n. 4
0
                self.pp_dir_sa, npz_dir),
                            shell=True)


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('-n',
                        '--number',
                        type=int,
                        default=None,
                        help='How many patients to maximally process.')
    args = parser.parse_args()
    total_stime = time.time()

    import configs
    cf = configs.Configs()

    # analysis finding: the following patients have unclear annotations. some raters gave more than one judgement
    # on the same roi.
    patients_to_exclude = [
        "0137a", "0404a", "0204a", "0252a", "0366a", "0863a", "0815a", "0060a",
        "0249a", "0436a", "0865a"
    ]
    # further finding: the following patients contain nodules with segmentation-label inconsistencies
    # running Preprocessor.verify_seg_label_pairings() produces a data frame with detailed findings.
    patients_to_exclude += ["0305a", "0447a"]
    exclude_paths = [
        os.path.join(cf.raw_data_dir, pid) for pid in patients_to_exclude
    ]
    # These pids are automatically found and excluded, when setting exclude_inconsistents=True at Preprocessor
    # initialization instead of passing the pre-compiled list.
Esempio n. 5
0
                rel_dir = os.path.relpath(out_dir,
                                          self.cf.pp_rootdir).split(os.sep)
                npz_out_dir = os.path.join(self.cf.pp_npz_dir,
                                           str(os.sep).join(rel_dir))
                print("npz out dir: ", npz_out_dir)
                os.makedirs(npz_out_dir, exist_ok=True)
                group_df.to_pickle(os.path.join(npz_out_dir, 'info_df.pickle'))
                dmanager.pack_dataset(out_dir,
                                      npz_out_dir,
                                      recursive=True,
                                      verbose=False)
        else:
            print(
                "Did not convert .npy-files to .npz because npz directory not set in configs."
            )


if __name__ == '__main__':
    import configs as cf
    cf = cf.Configs()
    total_stime = time.time()

    toy_gen = ToyGenerator(cf)
    toy_gen.create_sets()
    toy_gen.convert_copy_npz()

    mins, secs = divmod((time.time() - total_stime), 60)
    h, mins = divmod(mins, 60)
    t = "{:d}h:{:02d}m:{:02d}s".format(int(h), int(mins), int(secs))
    print("{} total runtime: {}".format(os.path.split(__file__)[1], t))